Index: test/CodeGen/AMDGPU/commute_modifiers.ll =================================================================== --- test/CodeGen/AMDGPU/commute_modifiers.ll +++ test/CodeGen/AMDGPU/commute_modifiers.ll @@ -72,8 +72,8 @@ %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %x = load float, float addrspace(1)* %gep.0 - %y = load float, float addrspace(1)* %gep.1 + %x = load volatile float, float addrspace(1)* %gep.0 + %y = load volatile float, float addrspace(1)* %gep.1 %y.fabs = call float @llvm.fabs.f32(float %y) #1 %z = fadd float %x, %y.fabs store float %z, float addrspace(1)* %out @@ -89,8 +89,8 @@ %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %x = load float, float addrspace(1)* %gep.0 - %y = load float, float addrspace(1)* %gep.1 + %x = load volatile float, float addrspace(1)* %gep.0 + %y = load volatile float, float addrspace(1)* %gep.1 %y.fneg = fsub float -0.000000e+00, %y %z = fmul float %x, %y.fneg store float %z, float addrspace(1)* %out @@ -106,8 +106,8 @@ %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %x = load float, float addrspace(1)* %gep.0 - %y = load float, float addrspace(1)* %gep.1 + %x = load volatile float, float addrspace(1)* %gep.0 + %y = load volatile float, float addrspace(1)* %gep.1 %y.fabs = call float @llvm.fabs.f32(float %y) #1 %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs %z = fmul float %x, %y.fabs.fneg @@ -125,8 +125,8 @@ %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %x = load float, float addrspace(1)* %gep.0 - %y = load float, float addrspace(1)* %gep.1 + %x = load volatile float, float addrspace(1)* %gep.0 + %y = load volatile float, float addrspace(1)* %gep.1 %x.fabs = call float @llvm.fabs.f32(float %x) #1 %y.fabs = call float @llvm.fabs.f32(float %y) #1 %z = fmul float %x.fabs, %y.fabs @@ -143,8 +143,8 @@ %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %x = load float, float addrspace(1)* %gep.0 - %y = load float, float addrspace(1)* %gep.1 + %x = load volatile float, float addrspace(1)* %gep.0 + %y = load volatile float, float addrspace(1)* %gep.1 %x.fabs = call float @llvm.fabs.f32(float %x) #1 %y.fabs = call float @llvm.fabs.f32(float %y) #1 %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs @@ -167,8 +167,8 @@ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %r1 = load float, float addrspace(1)* %gep.0 - %r2 = load float, float addrspace(1)* %gep.1 + %r1 = load volatile float, float addrspace(1)* %gep.0 + %r2 = load volatile float, float addrspace(1)* %gep.1 %r2.fabs = call float @llvm.fabs.f32(float %r2) Index: test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll =================================================================== --- test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll +++ test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll @@ -15,16 +15,16 @@ %offset = sext i32 %id to i64 %offset0 = add i64 %offset, 1027 %ptr0 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset0 - store i32 3, i32 addrspace(1)* %ptr0 + store volatile i32 3, i32 addrspace(1)* %ptr0 %offset1 = add i64 %offset, 1026 %ptr1 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset1 - store i32 2, i32 addrspace(1)* %ptr1 + store volatile i32 2, i32 addrspace(1)* %ptr1 %offset2 = add i64 %offset, 1025 %ptr2 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset2 - store i32 1, i32 addrspace(1)* %ptr2 + store volatile i32 1, i32 addrspace(1)* %ptr2 %offset3 = add i64 %offset, 1024 %ptr3 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset3 - store i32 0, i32 addrspace(1)* %ptr3 + store volatile i32 0, i32 addrspace(1)* %ptr3 ret void } Index: test/CodeGen/AMDGPU/ds_write2.ll =================================================================== --- test/CodeGen/AMDGPU/ds_write2.ll +++ test/CodeGen/AMDGPU/ds_write2.ll @@ -31,8 +31,8 @@ %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1 - %val0 = load float, float addrspace(1)* %in.gep.0, align 4 - %val1 = load float, float addrspace(1)* %in.gep.1, align 4 + %val0 = load volatile float, float addrspace(1)* %in.gep.0, align 4 + %val1 = load volatile float, float addrspace(1)* %in.gep.1, align 4 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i store float %val0, float addrspace(3)* %arrayidx0, align 4 %add.x = add nsw i32 %x.i, 8 @@ -50,8 +50,8 @@ %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i - %val0 = load float, float addrspace(1)* %in0.gep, align 4 - %val1 = load float, float addrspace(1)* %in1.gep, align 4 + %val0 = load volatile float, float addrspace(1)* %in0.gep, align 4 + %val1 = load volatile float, float addrspace(1)* %in1.gep, align 4 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i store volatile float %val0, float addrspace(3)* %arrayidx0, align 4 %add.x = add nsw i32 %x.i, 8 @@ -69,8 +69,8 @@ %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i - %val0 = load float, float addrspace(1)* %in0.gep, align 4 - %val1 = load float, float addrspace(1)* %in1.gep, align 4 + %val0 = load volatile float, float addrspace(1)* %in0.gep, align 4 + %val1 = load volatile float, float addrspace(1)* %in1.gep, align 4 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i store float %val0, float addrspace(3)* %arrayidx0, align 4 %add.x = add nsw i32 %x.i, 8 @@ -90,8 +90,8 @@ %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i %in.gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in.gep.0, i32 1 - %val0 = load <2 x float>, <2 x float> addrspace(1)* %in.gep.0, align 8 - %val1 = load <2 x float>, <2 x float> addrspace(1)* %in.gep.1, align 8 + %val0 = load volatile <2 x float>, <2 x float> addrspace(1)* %in.gep.0, align 8 + %val1 = load volatile <2 x float>, <2 x float> addrspace(1)* %in.gep.1, align 8 %val0.0 = extractelement <2 x float> %val0, i32 0 %val1.1 = extractelement <2 x float> %val1, i32 1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i @@ -150,8 +150,8 @@ %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1 - %val0 = load float, float addrspace(1)* %in.gep.0, align 4 - %val1 = load float, float addrspace(1)* %in.gep.1, align 4 + %val0 = load volatile float, float addrspace(1)* %in.gep.0, align 4 + %val1 = load volatile float, float addrspace(1)* %in.gep.1, align 4 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i store float %val0, float addrspace(3)* %arrayidx0, align 4 %add.x = add nsw i32 %x.i, 255 @@ -310,8 +310,8 @@ %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1 - %val0 = load double, double addrspace(1)* %in.gep.0, align 8 - %val1 = load double, double addrspace(1)* %in.gep.1, align 8 + %val0 = load volatile double, double addrspace(1)* %in.gep.0, align 8 + %val1 = load volatile double, double addrspace(1)* %in.gep.1, align 8 %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i store double %val0, double addrspace(3)* %arrayidx0, align 8 %add.x = add nsw i32 %x.i, 8 Index: test/CodeGen/AMDGPU/ds_write2st64.ll =================================================================== --- test/CodeGen/AMDGPU/ds_write2st64.ll +++ test/CodeGen/AMDGPU/ds_write2st64.ll @@ -29,8 +29,8 @@ %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1 - %val0 = load float, float addrspace(1)* %in.gep.0, align 4 - %val1 = load float, float addrspace(1)* %in.gep.1, align 4 + %val0 = load volatile float, float addrspace(1)* %in.gep.0, align 4 + %val1 = load volatile float, float addrspace(1)* %in.gep.1, align 4 %add.x.0 = add nsw i32 %x.i, 128 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0 store float %val0, float addrspace(3)* %arrayidx0, align 4 @@ -50,8 +50,8 @@ %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1 - %val0 = load float, float addrspace(1)* %in.gep.0, align 4 - %val1 = load float, float addrspace(1)* %in.gep.1, align 4 + %val0 = load volatile float, float addrspace(1)* %in.gep.0, align 4 + %val1 = load volatile float, float addrspace(1)* %in.gep.1, align 4 %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i store float %val0, float addrspace(3)* %arrayidx0, align 4 %add.x = add nsw i32 %x.i, 16320 @@ -70,8 +70,8 @@ %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1 - %val0 = load double, double addrspace(1)* %in.gep.0, align 8 - %val1 = load double, double addrspace(1)* %in.gep.1, align 8 + %val0 = load volatile double, double addrspace(1)* %in.gep.0, align 8 + %val1 = load volatile double, double addrspace(1)* %in.gep.1, align 8 %add.x.0 = add nsw i32 %x.i, 256 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0 store double %val0, double addrspace(3)* %arrayidx0, align 8 Index: test/CodeGen/AMDGPU/fdiv.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fdiv.f64.ll +++ test/CodeGen/AMDGPU/fdiv.f64.ll @@ -31,8 +31,8 @@ ; COMMON: s_endpgm define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in) nounwind { %gep.1 = getelementptr double, double addrspace(1)* %in, i32 1 - %num = load double, double addrspace(1)* %in - %den = load double, double addrspace(1)* %gep.1 + %num = load volatile double, double addrspace(1)* %in + %den = load volatile double, double addrspace(1)* %gep.1 %result = fdiv double %num, %den store double %result, double addrspace(1)* %out ret void Index: test/CodeGen/AMDGPU/fma-combine.ll =================================================================== --- test/CodeGen/AMDGPU/fma-combine.ll +++ test/CodeGen/AMDGPU/fma-combine.ll @@ -20,9 +20,9 @@ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid - %a = load double, double addrspace(1)* %gep.0 - %b = load double, double addrspace(1)* %gep.1 - %c = load double, double addrspace(1)* %gep.2 + %a = load volatile double, double addrspace(1)* %gep.0 + %b = load volatile double, double addrspace(1)* %gep.1 + %c = load volatile double, double addrspace(1)* %gep.2 %mul = fmul double %a, %b %fma = fadd double %mul, %c @@ -50,16 +50,16 @@ %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1 - %a = load double, double addrspace(1)* %gep.0 - %b = load double, double addrspace(1)* %gep.1 - %c = load double, double addrspace(1)* %gep.2 - %d = load double, double addrspace(1)* %gep.3 + %a = load volatile double, double addrspace(1)* %gep.0 + %b = load volatile double, double addrspace(1)* %gep.1 + %c = load volatile double, double addrspace(1)* %gep.2 + %d = load volatile double, double addrspace(1)* %gep.3 %mul = fmul double %a, %b %fma0 = fadd double %mul, %c %fma1 = fadd double %mul, %d - store double %fma0, double addrspace(1)* %gep.out.0 - store double %fma1, double addrspace(1)* %gep.out.1 + store volatile double %fma0, double addrspace(1)* %gep.out.0 + store volatile double %fma1, double addrspace(1)* %gep.out.1 ret void } @@ -77,9 +77,9 @@ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid - %a = load double, double addrspace(1)* %gep.0 - %b = load double, double addrspace(1)* %gep.1 - %c = load double, double addrspace(1)* %gep.2 + %a = load volatile double, double addrspace(1)* %gep.0 + %b = load volatile double, double addrspace(1)* %gep.1 + %c = load volatile double, double addrspace(1)* %gep.2 %mul = fmul double %a, %b %fma = fadd double %c, %mul @@ -101,9 +101,9 @@ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid - %a = load double, double addrspace(1)* %gep.0 - %b = load double, double addrspace(1)* %gep.1 - %c = load double, double addrspace(1)* %gep.2 + %a = load volatile double, double addrspace(1)* %gep.0 + %b = load volatile double, double addrspace(1)* %gep.1 + %c = load volatile double, double addrspace(1)* %gep.2 %mul = fmul double %a, %b %fma = fsub double %mul, %c @@ -131,16 +131,16 @@ %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1 - %a = load double, double addrspace(1)* %gep.0 - %b = load double, double addrspace(1)* %gep.1 - %c = load double, double addrspace(1)* %gep.2 - %d = load double, double addrspace(1)* %gep.3 + %a = load volatile double, double addrspace(1)* %gep.0 + %b = load volatile double, double addrspace(1)* %gep.1 + %c = load volatile double, double addrspace(1)* %gep.2 + %d = load volatile double, double addrspace(1)* %gep.3 %mul = fmul double %a, %b %fma0 = fsub double %mul, %c %fma1 = fsub double %mul, %d - store double %fma0, double addrspace(1)* %gep.out.0 - store double %fma1, double addrspace(1)* %gep.out.1 + store volatile double %fma0, double addrspace(1)* %gep.out.0 + store volatile double %fma1, double addrspace(1)* %gep.out.1 ret void } @@ -158,9 +158,9 @@ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid - %a = load double, double addrspace(1)* %gep.0 - %b = load double, double addrspace(1)* %gep.1 - %c = load double, double addrspace(1)* %gep.2 + %a = load volatile double, double addrspace(1)* %gep.0 + %b = load volatile double, double addrspace(1)* %gep.1 + %c = load volatile double, double addrspace(1)* %gep.2 %mul = fmul double %a, %b %fma = fsub double %c, %mul @@ -188,16 +188,16 @@ %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1 - %a = load double, double addrspace(1)* %gep.0 - %b = load double, double addrspace(1)* %gep.1 - %c = load double, double addrspace(1)* %gep.2 - %d = load double, double addrspace(1)* %gep.3 + %a = load volatile double, double addrspace(1)* %gep.0 + %b = load volatile double, double addrspace(1)* %gep.1 + %c = load volatile double, double addrspace(1)* %gep.2 + %d = load volatile double, double addrspace(1)* %gep.3 %mul = fmul double %a, %b %fma0 = fsub double %c, %mul %fma1 = fsub double %d, %mul - store double %fma0, double addrspace(1)* %gep.out.0 - store double %fma1, double addrspace(1)* %gep.out.1 + store volatile double %fma0, double addrspace(1)* %gep.out.0 + store volatile double %fma1, double addrspace(1)* %gep.out.1 ret void } @@ -215,9 +215,9 @@ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid - %a = load double, double addrspace(1)* %gep.0 - %b = load double, double addrspace(1)* %gep.1 - %c = load double, double addrspace(1)* %gep.2 + %a = load volatile double, double addrspace(1)* %gep.0 + %b = load volatile double, double addrspace(1)* %gep.1 + %c = load volatile double, double addrspace(1)* %gep.2 %mul = fmul double %a, %b %mul.neg = fsub double -0.0, %mul @@ -246,18 +246,18 @@ %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1 - %a = load double, double addrspace(1)* %gep.0 - %b = load double, double addrspace(1)* %gep.1 - %c = load double, double addrspace(1)* %gep.2 - %d = load double, double addrspace(1)* %gep.3 + %a = load volatile double, double addrspace(1)* %gep.0 + %b = load volatile double, double addrspace(1)* %gep.1 + %c = load volatile double, double addrspace(1)* %gep.2 + %d = load volatile double, double addrspace(1)* %gep.3 %mul = fmul double %a, %b %mul.neg = fsub double -0.0, %mul %fma0 = fsub double %mul.neg, %c %fma1 = fsub double %mul.neg, %d - store double %fma0, double addrspace(1)* %gep.out.0 - store double %fma1, double addrspace(1)* %gep.out.1 + store volatile double %fma0, double addrspace(1)* %gep.out.0 + store volatile double %fma1, double addrspace(1)* %gep.out.1 ret void } @@ -280,18 +280,18 @@ %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1 - %a = load double, double addrspace(1)* %gep.0 - %b = load double, double addrspace(1)* %gep.1 - %c = load double, double addrspace(1)* %gep.2 - %d = load double, double addrspace(1)* %gep.3 + %a = load volatile double, double addrspace(1)* %gep.0 + %b = load volatile double, double addrspace(1)* %gep.1 + %c = load volatile double, double addrspace(1)* %gep.2 + %d = load volatile double, double addrspace(1)* %gep.3 %mul = fmul double %a, %b %mul.neg = fsub double -0.0, %mul %fma0 = fsub double %mul.neg, %c %fma1 = fsub double %mul, %d - store double %fma0, double addrspace(1)* %gep.out.0 - store double %fma1, double addrspace(1)* %gep.out.1 + store volatile double %fma0, double addrspace(1)* %gep.out.0 + store volatile double %fma1, double addrspace(1)* %gep.out.1 ret void } @@ -315,11 +315,11 @@ %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid - %x = load double, double addrspace(1)* %gep.0 - %y = load double, double addrspace(1)* %gep.1 - %z = load double, double addrspace(1)* %gep.2 - %u = load double, double addrspace(1)* %gep.3 - %v = load double, double addrspace(1)* %gep.4 + %x = load volatile double, double addrspace(1)* %gep.0 + %y = load volatile double, double addrspace(1)* %gep.1 + %z = load volatile double, double addrspace(1)* %gep.2 + %u = load volatile double, double addrspace(1)* %gep.3 + %v = load volatile double, double addrspace(1)* %gep.4 %tmp0 = fmul double %u, %v %tmp1 = call double @llvm.fma.f64(double %x, double %y, double %tmp0) #0 @@ -350,11 +350,11 @@ %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid - %x = load double, double addrspace(1)* %gep.0 - %y = load double, double addrspace(1)* %gep.1 - %z = load double, double addrspace(1)* %gep.2 - %u = load double, double addrspace(1)* %gep.3 - %v = load double, double addrspace(1)* %gep.4 + %x = load volatile double, double addrspace(1)* %gep.0 + %y = load volatile double, double addrspace(1)* %gep.1 + %z = load volatile double, double addrspace(1)* %gep.2 + %u = load volatile double, double addrspace(1)* %gep.3 + %v = load volatile double, double addrspace(1)* %gep.4 %tmp0 = fmul double %u, %v %tmp1 = call double @llvm.fma.f64(double %y, double %z, double %tmp0) #0 @@ -373,8 +373,8 @@ define void @test_f32_mul_add_x_one_y(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { - %x = load float, float addrspace(1)* %in1 - %y = load float, float addrspace(1)* %in2 + %x = load volatile float, float addrspace(1)* %in1 + %y = load volatile float, float addrspace(1)* %in2 %a = fadd float %x, 1.0 %m = fmul float %a, %y store float %m, float addrspace(1)* %out @@ -386,8 +386,8 @@ define void @test_f32_mul_y_add_x_one(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { - %x = load float, float addrspace(1)* %in1 - %y = load float, float addrspace(1)* %in2 + %x = load volatile float, float addrspace(1)* %in1 + %y = load volatile float, float addrspace(1)* %in2 %a = fadd float %x, 1.0 %m = fmul float %y, %a store float %m, float addrspace(1)* %out Index: test/CodeGen/AMDGPU/fmax3.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fmax3.f64.ll +++ test/CodeGen/AMDGPU/fmax3.f64.ll @@ -14,9 +14,9 @@ define void @test_fmax3_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind { %bptr = getelementptr double, double addrspace(1)* %aptr, i32 1 %cptr = getelementptr double, double addrspace(1)* %aptr, i32 2 - %a = load double, double addrspace(1)* %aptr, align 8 - %b = load double, double addrspace(1)* %bptr, align 8 - %c = load double, double addrspace(1)* %cptr, align 8 + %a = load volatile double, double addrspace(1)* %aptr, align 8 + %b = load volatile double, double addrspace(1)* %bptr, align 8 + %c = load volatile double, double addrspace(1)* %cptr, align 8 %f0 = call double @llvm.maxnum.f64(double %a, double %b) nounwind readnone %f1 = call double @llvm.maxnum.f64(double %f0, double %c) nounwind readnone store double %f1, double addrspace(1)* %out, align 8 Index: test/CodeGen/AMDGPU/fmax_legacy.ll =================================================================== --- test/CodeGen/AMDGPU/fmax_legacy.ll +++ test/CodeGen/AMDGPU/fmax_legacy.ll @@ -18,8 +18,8 @@ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %cmp = fcmp uge float %a, %b %val = select i1 %cmp, float %a, float %b @@ -38,8 +38,8 @@ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %cmp = fcmp oge float %a, %b %val = select i1 %cmp, float %a, float %b @@ -58,8 +58,8 @@ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %cmp = fcmp ugt float %a, %b %val = select i1 %cmp, float %a, float %b @@ -78,8 +78,8 @@ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %cmp = fcmp ogt float %a, %b %val = select i1 %cmp, float %a, float %b @@ -142,8 +142,8 @@ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %cmp = fcmp ogt float %a, %b %val = select i1 %cmp, float %a, float %b Index: test/CodeGen/AMDGPU/fmin_legacy.ll =================================================================== --- test/CodeGen/AMDGPU/fmin_legacy.ll +++ test/CodeGen/AMDGPU/fmin_legacy.ll @@ -51,8 +51,8 @@ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %cmp = fcmp ule float %a, %b %val = select i1 %cmp, float %a, float %b @@ -70,8 +70,8 @@ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %cmp = fcmp ole float %a, %b %val = select i1 %cmp, float %a, float %b @@ -89,8 +89,8 @@ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %cmp = fcmp olt float %a, %b %val = select i1 %cmp, float %a, float %b @@ -108,8 +108,8 @@ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %cmp = fcmp ult float %a, %b %val = select i1 %cmp, float %a, float %b @@ -193,8 +193,8 @@ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %cmp = fcmp ole float %a, %b %val0 = select i1 %cmp, float %a, float %b Index: test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll =================================================================== --- test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll +++ test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll @@ -36,8 +36,8 @@ %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %mul2 = fmul fast float %x, 2.0 %mad = fadd fast float %mul2, %y - store float %mul2, float addrspace(1)* %out - store float %mad, float addrspace(1)* %out.gep.1 + store volatile float %mul2, float addrspace(1)* %out + store volatile float %mad, float addrspace(1)* %out.gep.1 ret void } @@ -52,8 +52,8 @@ %x.abs = call float @llvm.fabs.f32(float %x) %mul2 = fmul fast float %x.abs, 2.0 %mad = fadd fast float %mul2, %y - store float %mul2, float addrspace(1)* %out - store float %mad, float addrspace(1)* %out.gep.1 + store volatile float %mul2, float addrspace(1)* %out + store volatile float %mad, float addrspace(1)* %out.gep.1 ret void } @@ -66,8 +66,8 @@ %mul2 = fmul fast float %x.abs, 2.0 %mad0 = fadd fast float %mul2, %y %mad1 = fadd fast float %mul2, %z - store float %mad0, float addrspace(1)* %out - store float %mad1, float addrspace(1)* %out.gep.1 + store volatile float %mad0, float addrspace(1)* %out + store volatile float %mad1, float addrspace(1)* %out.gep.1 ret void } @@ -80,7 +80,7 @@ %mul2 = fmul fast float %x, 2.0 %muln2 = fmul fast float %x, -2.0 %mul = fmul fast float %mul2, %muln2 - store float %mul, float addrspace(1)* %out + store volatile float %mul, float addrspace(1)* %out ret void } @@ -94,7 +94,7 @@ %mul2 = fmul fast float %x, 2.0 %muln2 = fmul fast float %x, -3.0 %mul = fmul fast float %mul2, %muln2 - store float %mul, float addrspace(1)* %out + store volatile float %mul, float addrspace(1)* %out ret void } Index: test/CodeGen/AMDGPU/fmuladd.ll =================================================================== --- test/CodeGen/AMDGPU/fmuladd.ll +++ test/CodeGen/AMDGPU/fmuladd.ll @@ -42,8 +42,8 @@ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %r1 = load float, float addrspace(1)* %gep.0 - %r2 = load float, float addrspace(1)* %gep.1 + %r1 = load volatile float, float addrspace(1)* %gep.0 + %r2 = load volatile float, float addrspace(1)* %gep.1 %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2) store float %r3, float addrspace(1)* %gep.out @@ -61,8 +61,8 @@ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %r1 = load float, float addrspace(1)* %gep.0 - %r2 = load float, float addrspace(1)* %gep.1 + %r1 = load volatile float, float addrspace(1)* %gep.0 + %r2 = load volatile float, float addrspace(1)* %gep.1 %r3 = tail call float @llvm.fmuladd.f32(float %r1, float 2.0, float %r2) store float %r3, float addrspace(1)* %gep.out @@ -82,8 +82,8 @@ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %r0 = load float, float addrspace(1)* %gep.0 - %r1 = load float, float addrspace(1)* %gep.1 + %r0 = load volatile float, float addrspace(1)* %gep.0 + %r1 = load volatile float, float addrspace(1)* %gep.1 %add.0 = fadd float %r0, %r0 %add.1 = fadd float %add.0, %r1 @@ -104,8 +104,8 @@ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %r0 = load float, float addrspace(1)* %gep.0 - %r1 = load float, float addrspace(1)* %gep.1 + %r0 = load volatile float, float addrspace(1)* %gep.0 + %r1 = load volatile float, float addrspace(1)* %gep.1 %add.0 = fadd float %r0, %r0 %add.1 = fadd float %r1, %add.0 @@ -124,8 +124,8 @@ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %r1 = load float, float addrspace(1)* %gep.0 - %r2 = load float, float addrspace(1)* %gep.1 + %r1 = load volatile float, float addrspace(1)* %gep.0 + %r2 = load volatile float, float addrspace(1)* %gep.1 %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1, float %r2) store float %r3, float addrspace(1)* %gep.out @@ -144,8 +144,8 @@ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %r1 = load float, float addrspace(1)* %gep.0 - %r2 = load float, float addrspace(1)* %gep.1 + %r1 = load volatile float, float addrspace(1)* %gep.0 + %r2 = load volatile float, float addrspace(1)* %gep.1 %r1.fneg = fsub float -0.000000e+00, %r1 @@ -166,8 +166,8 @@ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %r1 = load float, float addrspace(1)* %gep.0 - %r2 = load float, float addrspace(1)* %gep.1 + %r1 = load volatile float, float addrspace(1)* %gep.0 + %r2 = load volatile float, float addrspace(1)* %gep.1 %r1.fneg = fsub float -0.000000e+00, %r1 @@ -188,8 +188,8 @@ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %r1 = load float, float addrspace(1)* %gep.0 - %r2 = load float, float addrspace(1)* %gep.1 + %r1 = load volatile float, float addrspace(1)* %gep.0 + %r2 = load volatile float, float addrspace(1)* %gep.1 %r2.fneg = fsub float -0.000000e+00, %r2 Index: test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll @@ -121,9 +121,9 @@ %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2 %gep.out = getelementptr float, float addrspace(1)* %out, i32 2 - %a = load float, float addrspace(1)* %gep.a - %b = load float, float addrspace(1)* %gep.b - %c = load float, float addrspace(1)* %gep.c + %a = load volatile float, float addrspace(1)* %gep.a + %b = load volatile float, float addrspace(1)* %gep.b + %c = load volatile float, float addrspace(1)* %gep.c %cmp0 = icmp eq i32 %tid, 0 %cmp1 = icmp ne i32 %d, 0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll @@ -5,7 +5,7 @@ declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) nounwind readnone declare float @llvm.fabs.f32(float) nounwind readnone -; SI-LABEL @test_div_scale_f32_1: +; SI-LABEL: {{^}}test_div_scale_f32_1: ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] @@ -16,8 +16,8 @@ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 @@ -25,7 +25,7 @@ ret void } -; SI-LABEL @test_div_scale_f32_2: +; SI-LABEL: {{^}}test_div_scale_f32_2: ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] @@ -36,8 +36,8 @@ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 @@ -45,7 +45,7 @@ ret void } -; SI-LABEL @test_div_scale_f64_1: +; SI-LABEL: {{^}}test_div_scale_f64_1: ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] @@ -56,8 +56,8 @@ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 - %a = load double, double addrspace(1)* %gep.0, align 8 - %b = load double, double addrspace(1)* %gep.1, align 8 + %a = load volatile double, double addrspace(1)* %gep.0, align 8 + %b = load volatile double, double addrspace(1)* %gep.1, align 8 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 @@ -65,7 +65,7 @@ ret void } -; SI-LABEL @test_div_scale_f64_1: +; SI-LABEL: {{^}}test_div_scale_f64_2: ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] @@ -76,8 +76,8 @@ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 - %a = load double, double addrspace(1)* %gep.0, align 8 - %b = load double, double addrspace(1)* %gep.1, align 8 + %a = load volatile double, double addrspace(1)* %gep.0, align 8 + %b = load volatile double, double addrspace(1)* %gep.1, align 8 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 @@ -85,7 +85,7 @@ ret void } -; SI-LABEL @test_div_scale_f32_scalar_num_1: +; SI-LABEL: {{^}}test_div_scale_f32_scalar_num_1: ; SI-DAG: buffer_load_dword [[B:v[0-9]+]] ; SI-DAG: s_load_dword [[A:s[0-9]+]] ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] @@ -103,7 +103,7 @@ ret void } -; SI-LABEL @test_div_scale_f32_scalar_num_2: +; SI-LABEL: {{^}}test_div_scale_f32_scalar_num_2: ; SI-DAG: buffer_load_dword [[B:v[0-9]+]] ; SI-DAG: s_load_dword [[A:s[0-9]+]] ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] @@ -121,7 +121,7 @@ ret void } -; SI-LABEL @test_div_scale_f32_scalar_den_1: +; SI-LABEL: {{^}}test_div_scale_f32_scalar_den_1: ; SI-DAG: buffer_load_dword [[A:v[0-9]+]] ; SI-DAG: s_load_dword [[B:s[0-9]+]] ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] @@ -139,7 +139,7 @@ ret void } -; SI-LABEL @test_div_scale_f32_scalar_den_2: +; SI-LABEL: {{^}}test_div_scale_f32_scalar_den_2: ; SI-DAG: buffer_load_dword [[A:v[0-9]+]] ; SI-DAG: s_load_dword [[B:s[0-9]+]] ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] @@ -157,7 +157,7 @@ ret void } -; SI-LABEL @test_div_scale_f64_scalar_num_1: +; SI-LABEL: {{^}}test_div_scale_f64_scalar_num_1: ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]] ; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] @@ -175,7 +175,7 @@ ret void } -; SI-LABEL @test_div_scale_f64_scalar_num_2: +; SI-LABEL: {{^}}test_div_scale_f64_scalar_num_2: ; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]] ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] @@ -193,7 +193,7 @@ ret void } -; SI-LABEL @test_div_scale_f64_scalar_den_1: +; SI-LABEL: {{^}}test_div_scale_f64_scalar_den_1: ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] ; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] @@ -211,7 +211,7 @@ ret void } -; SI-LABEL @test_div_scale_f64_scalar_den_2: +; SI-LABEL: {{^}}test_div_scale_f64_scalar_den_2: ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] ; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] @@ -229,7 +229,7 @@ ret void } -; SI-LABEL @test_div_scale_f32_all_scalar_1: +; SI-LABEL: {{^}}test_div_scale_f32_all_scalar_1: ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc ; SI: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]] @@ -243,7 +243,7 @@ ret void } -; SI-LABEL @test_div_scale_f32_all_scalar_2: +; SI-LABEL: {{^}}test_div_scale_f32_all_scalar_2: ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc ; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]] @@ -257,7 +257,7 @@ ret void } -; SI-LABEL @test_div_scale_f64_all_scalar_1: +; SI-LABEL: {{^}}test_div_scale_f64_all_scalar_1: ; SI-DAG: s_load_dwordx2 s{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd ; SI-DAG: v_mov_b32_e32 v[[VA_LO:[0-9]+]], s[[A_LO]] @@ -272,7 +272,7 @@ ret void } -; SI-LABEL @test_div_scale_f64_all_scalar_2: +; SI-LABEL: {{^}}test_div_scale_f64_all_scalar_2: ; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dwordx2 s{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xd ; SI-DAG: v_mov_b32_e32 v[[VB_LO:[0-9]+]], s[[B_LO]] @@ -287,7 +287,7 @@ ret void } -; SI-LABEL @test_div_scale_f32_inline_imm_num: +; SI-LABEL: {{^}}test_div_scale_f32_inline_imm_num: ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[A]], 1.0 ; SI: buffer_store_dword [[RESULT0]] @@ -303,7 +303,7 @@ ret void } -; SI-LABEL @test_div_scale_f32_inline_imm_den: +; SI-LABEL: {{^}}test_div_scale_f32_inline_imm_den: ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], 2.0, 2.0, [[A]] ; SI: buffer_store_dword [[RESULT0]] @@ -319,7 +319,7 @@ ret void } -; SI-LABEL @test_div_scale_f32_fabs_num: +; SI-LABEL: {{^}}test_div_scale_f32_fabs_num: ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], |[[A]]| @@ -330,8 +330,8 @@ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone @@ -341,7 +341,7 @@ ret void } -; SI-LABEL @test_div_scale_f32_fabs_den: +; SI-LABEL: {{^}}test_div_scale_f32_fabs_den: ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], |[[B]]|, |[[B]]|, [[A]] @@ -352,8 +352,8 @@ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone Index: test/CodeGen/AMDGPU/mad-combine.ll =================================================================== --- test/CodeGen/AMDGPU/mad-combine.ll +++ test/CodeGen/AMDGPU/mad-combine.ll @@ -38,9 +38,9 @@ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep.0 - %b = load float, float addrspace(1)* %gep.1 - %c = load float, float addrspace(1)* %gep.2 + %a = load volatile float, float addrspace(1)* %gep.0 + %b = load volatile float, float addrspace(1)* %gep.1 + %c = load volatile float, float addrspace(1)* %gep.2 %mul = fmul float %a, %b %fma = fadd float %mul, %c @@ -79,17 +79,17 @@ %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1 - %a = load float, float addrspace(1)* %gep.0 - %b = load float, float addrspace(1)* %gep.1 - %c = load float, float addrspace(1)* %gep.2 - %d = load float, float addrspace(1)* %gep.3 + %a = load volatile float, float addrspace(1)* %gep.0 + %b = load volatile float, float addrspace(1)* %gep.1 + %c = load volatile float, float addrspace(1)* %gep.2 + %d = load volatile float, float addrspace(1)* %gep.3 %mul = fmul float %a, %b %fma0 = fadd float %mul, %c %fma1 = fadd float %mul, %d - store float %fma0, float addrspace(1)* %gep.out.0 - store float %fma1, float addrspace(1)* %gep.out.1 + store volatile float %fma0, float addrspace(1)* %gep.out.0 + store volatile float %fma1, float addrspace(1)* %gep.out.1 ret void } @@ -114,9 +114,9 @@ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep.0 - %b = load float, float addrspace(1)* %gep.1 - %c = load float, float addrspace(1)* %gep.2 + %a = load volatile float, float addrspace(1)* %gep.0 + %b = load volatile float, float addrspace(1)* %gep.1 + %c = load volatile float, float addrspace(1)* %gep.2 %mul = fmul float %a, %b %fma = fadd float %c, %mul @@ -144,9 +144,9 @@ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep.0 - %b = load float, float addrspace(1)* %gep.1 - %c = load float, float addrspace(1)* %gep.2 + %a = load volatile float, float addrspace(1)* %gep.0 + %b = load volatile float, float addrspace(1)* %gep.1 + %c = load volatile float, float addrspace(1)* %gep.2 %mul = fmul float %a, %b %fma = fsub float %mul, %c @@ -183,16 +183,16 @@ %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1 - %a = load float, float addrspace(1)* %gep.0 - %b = load float, float addrspace(1)* %gep.1 - %c = load float, float addrspace(1)* %gep.2 - %d = load float, float addrspace(1)* %gep.3 + %a = load volatile float, float addrspace(1)* %gep.0 + %b = load volatile float, float addrspace(1)* %gep.1 + %c = load volatile float, float addrspace(1)* %gep.2 + %d = load volatile float, float addrspace(1)* %gep.3 %mul = fmul float %a, %b %fma0 = fsub float %mul, %c %fma1 = fsub float %mul, %d - store float %fma0, float addrspace(1)* %gep.out.0 - store float %fma1, float addrspace(1)* %gep.out.1 + store volatile float %fma0, float addrspace(1)* %gep.out.0 + store volatile float %fma1, float addrspace(1)* %gep.out.1 ret void } @@ -216,9 +216,9 @@ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep.0 - %b = load float, float addrspace(1)* %gep.1 - %c = load float, float addrspace(1)* %gep.2 + %a = load volatile float, float addrspace(1)* %gep.0 + %b = load volatile float, float addrspace(1)* %gep.1 + %c = load volatile float, float addrspace(1)* %gep.2 %mul = fmul float %a, %b %fma = fsub float %c, %mul @@ -254,16 +254,16 @@ %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1 - %a = load float, float addrspace(1)* %gep.0 - %b = load float, float addrspace(1)* %gep.1 - %c = load float, float addrspace(1)* %gep.2 - %d = load float, float addrspace(1)* %gep.3 + %a = load volatile float, float addrspace(1)* %gep.0 + %b = load volatile float, float addrspace(1)* %gep.1 + %c = load volatile float, float addrspace(1)* %gep.2 + %d = load volatile float, float addrspace(1)* %gep.3 %mul = fmul float %a, %b %fma0 = fsub float %c, %mul %fma1 = fsub float %d, %mul - store float %fma0, float addrspace(1)* %gep.out.0 - store float %fma1, float addrspace(1)* %gep.out.1 + store volatile float %fma0, float addrspace(1)* %gep.out.0 + store volatile float %fma1, float addrspace(1)* %gep.out.1 ret void } @@ -288,9 +288,9 @@ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep.0 - %b = load float, float addrspace(1)* %gep.1 - %c = load float, float addrspace(1)* %gep.2 + %a = load volatile float, float addrspace(1)* %gep.0 + %b = load volatile float, float addrspace(1)* %gep.1 + %c = load volatile float, float addrspace(1)* %gep.2 %mul = fmul float %a, %b %mul.neg = fsub float -0.0, %mul @@ -328,18 +328,18 @@ %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1 - %a = load float, float addrspace(1)* %gep.0 - %b = load float, float addrspace(1)* %gep.1 - %c = load float, float addrspace(1)* %gep.2 - %d = load float, float addrspace(1)* %gep.3 + %a = load volatile float, float addrspace(1)* %gep.0 + %b = load volatile float, float addrspace(1)* %gep.1 + %c = load volatile float, float addrspace(1)* %gep.2 + %d = load volatile float, float addrspace(1)* %gep.3 %mul = fmul float %a, %b %mul.neg = fsub float -0.0, %mul %fma0 = fsub float %mul.neg, %c %fma1 = fsub float %mul.neg, %d - store float %fma0, float addrspace(1)* %gep.out.0 - store float %fma1, float addrspace(1)* %gep.out.1 + store volatile float %fma0, float addrspace(1)* %gep.out.0 + store volatile float %fma1, float addrspace(1)* %gep.out.1 ret void } @@ -371,18 +371,18 @@ %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1 - %a = load float, float addrspace(1)* %gep.0 - %b = load float, float addrspace(1)* %gep.1 - %c = load float, float addrspace(1)* %gep.2 - %d = load float, float addrspace(1)* %gep.3 + %a = load volatile float, float addrspace(1)* %gep.0 + %b = load volatile float, float addrspace(1)* %gep.1 + %c = load volatile float, float addrspace(1)* %gep.2 + %d = load volatile float, float addrspace(1)* %gep.3 %mul = fmul float %a, %b %mul.neg = fsub float -0.0, %mul %fma0 = fsub float %mul.neg, %c %fma1 = fsub float %mul, %d - store float %fma0, float addrspace(1)* %gep.out.0 - store float %fma1, float addrspace(1)* %gep.out.1 + store volatile float %fma0, float addrspace(1)* %gep.out.0 + store volatile float %fma1, float addrspace(1)* %gep.out.1 ret void } @@ -416,11 +416,11 @@ %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %x = load float, float addrspace(1)* %gep.0 - %y = load float, float addrspace(1)* %gep.1 - %z = load float, float addrspace(1)* %gep.2 - %u = load float, float addrspace(1)* %gep.3 - %v = load float, float addrspace(1)* %gep.4 + %x = load volatile float, float addrspace(1)* %gep.0 + %y = load volatile float, float addrspace(1)* %gep.1 + %z = load volatile float, float addrspace(1)* %gep.2 + %u = load volatile float, float addrspace(1)* %gep.3 + %v = load volatile float, float addrspace(1)* %gep.4 %tmp0 = fmul float %u, %v %tmp1 = call float @llvm.fma.f32(float %x, float %y, float %tmp0) #0 @@ -462,11 +462,11 @@ %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %x = load float, float addrspace(1)* %gep.0 - %y = load float, float addrspace(1)* %gep.1 - %z = load float, float addrspace(1)* %gep.2 - %u = load float, float addrspace(1)* %gep.3 - %v = load float, float addrspace(1)* %gep.4 + %x = load volatile float, float addrspace(1)* %gep.0 + %y = load volatile float, float addrspace(1)* %gep.1 + %z = load volatile float, float addrspace(1)* %gep.2 + %u = load volatile float, float addrspace(1)* %gep.3 + %v = load volatile float, float addrspace(1)* %gep.4 %tmp0 = fmul float %u, %v %tmp1 = call float @llvm.fma.f32(float %y, float %z, float %tmp0) #0 @@ -508,11 +508,11 @@ %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %x = load float, float addrspace(1)* %gep.0 - %y = load float, float addrspace(1)* %gep.1 - %z = load float, float addrspace(1)* %gep.2 - %u = load float, float addrspace(1)* %gep.3 - %v = load float, float addrspace(1)* %gep.4 + %x = load volatile float, float addrspace(1)* %gep.0 + %y = load volatile float, float addrspace(1)* %gep.1 + %z = load volatile float, float addrspace(1)* %gep.2 + %u = load volatile float, float addrspace(1)* %gep.3 + %v = load volatile float, float addrspace(1)* %gep.4 %tmp0 = fmul float %u, %v %tmp1 = call float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) #0 @@ -554,11 +554,11 @@ %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %x = load float, float addrspace(1)* %gep.0 - %y = load float, float addrspace(1)* %gep.1 - %z = load float, float addrspace(1)* %gep.2 - %u = load float, float addrspace(1)* %gep.3 - %v = load float, float addrspace(1)* %gep.4 + %x = load volatile float, float addrspace(1)* %gep.0 + %y = load volatile float, float addrspace(1)* %gep.1 + %z = load volatile float, float addrspace(1)* %gep.2 + %u = load volatile float, float addrspace(1)* %gep.3 + %v = load volatile float, float addrspace(1)* %gep.4 %tmp0 = fmul float %u, %v %tmp1 = call float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0 Index: test/CodeGen/AMDGPU/mad-sub.ll =================================================================== --- test/CodeGen/AMDGPU/mad-sub.ll +++ test/CodeGen/AMDGPU/mad-sub.ll @@ -18,9 +18,9 @@ %add2 = add i64 %tid.ext, 2 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext - %a = load float, float addrspace(1)* %gep0, align 4 - %b = load float, float addrspace(1)* %gep1, align 4 - %c = load float, float addrspace(1)* %gep2, align 4 + %a = load volatile float, float addrspace(1)* %gep0, align 4 + %b = load volatile float, float addrspace(1)* %gep1, align 4 + %c = load volatile float, float addrspace(1)* %gep2, align 4 %mul = fmul float %a, %b %sub = fsub float %mul, %c store float %sub, float addrspace(1)* %outgep, align 4 @@ -42,9 +42,9 @@ %add2 = add i64 %tid.ext, 2 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext - %a = load float, float addrspace(1)* %gep0, align 4 - %b = load float, float addrspace(1)* %gep1, align 4 - %c = load float, float addrspace(1)* %gep2, align 4 + %a = load volatile float, float addrspace(1)* %gep0, align 4 + %b = load volatile float, float addrspace(1)* %gep1, align 4 + %c = load volatile float, float addrspace(1)* %gep2, align 4 %mul = fmul float %a, %b %sub = fsub float %c, %mul store float %sub, float addrspace(1)* %outgep, align 4 @@ -63,9 +63,9 @@ %add2 = add i64 %tid.ext, 2 %gep2 = getelementptr double, double addrspace(1)* %ptr, i64 %add2 %outgep = getelementptr double, double addrspace(1)* %out, i64 %tid.ext - %a = load double, double addrspace(1)* %gep0, align 8 - %b = load double, double addrspace(1)* %gep1, align 8 - %c = load double, double addrspace(1)* %gep2, align 8 + %a = load volatile double, double addrspace(1)* %gep0, align 8 + %b = load volatile double, double addrspace(1)* %gep1, align 8 + %c = load volatile double, double addrspace(1)* %gep2, align 8 %mul = fmul double %a, %b %sub = fsub double %mul, %c store double %sub, double addrspace(1)* %outgep, align 8 @@ -87,9 +87,9 @@ %add2 = add i64 %tid.ext, 2 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext - %a = load float, float addrspace(1)* %gep0, align 4 - %b = load float, float addrspace(1)* %gep1, align 4 - %c = load float, float addrspace(1)* %gep2, align 4 + %a = load volatile float, float addrspace(1)* %gep0, align 4 + %b = load volatile float, float addrspace(1)* %gep1, align 4 + %c = load volatile float, float addrspace(1)* %gep2, align 4 %c.abs = call float @llvm.fabs.f32(float %c) #0 %mul = fmul float %a, %b %sub = fsub float %mul, %c.abs @@ -112,9 +112,9 @@ %add2 = add i64 %tid.ext, 2 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext - %a = load float, float addrspace(1)* %gep0, align 4 - %b = load float, float addrspace(1)* %gep1, align 4 - %c = load float, float addrspace(1)* %gep2, align 4 + %a = load volatile float, float addrspace(1)* %gep0, align 4 + %b = load volatile float, float addrspace(1)* %gep1, align 4 + %c = load volatile float, float addrspace(1)* %gep2, align 4 %c.abs = call float @llvm.fabs.f32(float %c) #0 %mul = fmul float %a, %b %sub = fsub float %c.abs, %mul @@ -133,9 +133,9 @@ %add2 = add i64 %tid.ext, 2 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext - %a = load float, float addrspace(1)* %gep0, align 4 - %b = load float, float addrspace(1)* %gep1, align 4 - %c = load float, float addrspace(1)* %gep2, align 4 + %a = load volatile float, float addrspace(1)* %gep0, align 4 + %b = load volatile float, float addrspace(1)* %gep1, align 4 + %c = load volatile float, float addrspace(1)* %gep2, align 4 %nega = fsub float -0.000000e+00, %a %negb = fsub float -0.000000e+00, %b %mul = fmul float %nega, %negb @@ -159,9 +159,9 @@ %add2 = add i64 %tid.ext, 2 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext - %a = load float, float addrspace(1)* %gep0, align 4 - %b = load float, float addrspace(1)* %gep1, align 4 - %c = load float, float addrspace(1)* %gep2, align 4 + %a = load volatile float, float addrspace(1)* %gep0, align 4 + %b = load volatile float, float addrspace(1)* %gep1, align 4 + %c = load volatile float, float addrspace(1)* %gep2, align 4 %b.abs = call float @llvm.fabs.f32(float %b) #0 %mul = fmul float %a, %b.abs %sub = fsub float %mul, %c @@ -180,8 +180,8 @@ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %r1 = load float, float addrspace(1)* %gep.0 - %r2 = load float, float addrspace(1)* %gep.1 + %r1 = load volatile float, float addrspace(1)* %gep.0 + %r2 = load volatile float, float addrspace(1)* %gep.1 %add = fadd float %r1, %r1 %r3 = fsub float %r2, %add @@ -201,8 +201,8 @@ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %r1 = load float, float addrspace(1)* %gep.0 - %r2 = load float, float addrspace(1)* %gep.1 + %r1 = load volatile float, float addrspace(1)* %gep.0 + %r2 = load volatile float, float addrspace(1)* %gep.1 %add = fadd float %r1, %r1 %r3 = fsub float %add, %r2 Index: test/CodeGen/AMDGPU/madmk.ll =================================================================== --- test/CodeGen/AMDGPU/madmk.ll +++ test/CodeGen/AMDGPU/madmk.ll @@ -14,8 +14,8 @@ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %mul = fmul float %a, 10.0 %madmk = fadd float %mul, %b @@ -41,9 +41,9 @@ %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1 - %a = load float, float addrspace(1)* %in.gep.0, align 4 - %b = load float, float addrspace(1)* %in.gep.1, align 4 - %c = load float, float addrspace(1)* %in.gep.2, align 4 + %a = load volatile float, float addrspace(1)* %in.gep.0, align 4 + %b = load volatile float, float addrspace(1)* %in.gep.1, align 4 + %c = load volatile float, float addrspace(1)* %in.gep.2, align 4 %mul0 = fmul float %a, 10.0 %mul1 = fmul float %a, 10.0 @@ -66,8 +66,8 @@ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %mul = fmul float %a, 4.0 %madmk = fadd float %mul, %b @@ -131,8 +131,8 @@ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone @@ -152,8 +152,8 @@ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep.0, align 4 - %b = load float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, float addrspace(1)* %gep.0, align 4 + %b = load volatile float, float addrspace(1)* %gep.1, align 4 %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone Index: test/CodeGen/AMDGPU/rsq.ll =================================================================== --- test/CodeGen/AMDGPU/rsq.ll +++ test/CodeGen/AMDGPU/rsq.ll @@ -62,9 +62,9 @@ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 - %a = load float, float addrspace(1)* %gep.0 - %b = load float, float addrspace(1)* %gep.1 - %c = load float, float addrspace(1)* %gep.2 + %a = load volatile float, float addrspace(1)* %gep.0 + %b = load volatile float, float addrspace(1)* %gep.1 + %c = load volatile float, float addrspace(1)* %gep.2 %x = call float @llvm.sqrt.f32(float %a) %y = fmul float %x, %b Index: test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll =================================================================== --- test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll +++ test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll @@ -8,8 +8,8 @@ define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 - %a = load i32, i32 addrspace(1)* %in - %b = load i32, i32 addrspace(1)* %b_ptr + %a = load volatile i32, i32 addrspace(1)* %in + %b = load volatile i32, i32 addrspace(1)* %b_ptr %result = add i32 %a, %b store i32 %result, i32 addrspace(1)* %out ret void Index: test/CodeGen/AMDGPU/v_mac.ll =================================================================== --- test/CodeGen/AMDGPU/v_mac.ll +++ test/CodeGen/AMDGPU/v_mac.ll @@ -12,9 +12,9 @@ %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1 %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2 - %a = load float, float addrspace(1)* %in - %b = load float, float addrspace(1)* %b_ptr - %c = load float, float addrspace(1)* %c_ptr + %a = load volatile float, float addrspace(1)* %in + %b = load volatile float, float addrspace(1)* %b_ptr + %c = load volatile float, float addrspace(1)* %c_ptr %tmp0 = fmul float %a, %b %tmp1 = fadd float %tmp0, %c @@ -71,11 +71,11 @@ %d_ptr = getelementptr float, float addrspace(1)* %in, i32 3 %e_ptr = getelementptr float, float addrspace(1)* %in, i32 4 - %a = load float, float addrspace(1)* %in - %b = load float, float addrspace(1)* %b_ptr - %c = load float, float addrspace(1)* %c_ptr - %d = load float, float addrspace(1)* %d_ptr - %e = load float, float addrspace(1)* %e_ptr + %a = load volatile float, float addrspace(1)* %in + %b = load volatile float, float addrspace(1)* %b_ptr + %c = load volatile float, float addrspace(1)* %c_ptr + %d = load volatile float, float addrspace(1)* %d_ptr + %e = load volatile float, float addrspace(1)* %e_ptr %tmp0 = fmul float %a, %b %tmp1 = fadd float %tmp0, %c