Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9783,12 +9783,11 @@ // fold (conv (load x)) -> (load (conv*)x) // If the resultant load doesn't need a higher alignment than the original! if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && - // Do not change the width of a volatile load. - !cast(N0)->isVolatile() && // Do not remove the cast if the types differ in endian layout. TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && - (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && + ((!LegalOperations && !cast(N0)->isVolatile()) || + TLI.isOperationLegal(ISD::LOAD, VT)) && TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast(N0); unsigned OrigAlign = LN0->getAlignment(); Index: test/CodeGen/AMDGPU/copy-illegal-type.ll =================================================================== --- test/CodeGen/AMDGPU/copy-illegal-type.ll +++ test/CodeGen/AMDGPU/copy-illegal-type.ll @@ -147,10 +147,7 @@ } ; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_load: -; GCN: {{buffer|flat}}_load_ubyte -; GCN: {{buffer|flat}}_load_ubyte -; GCN: {{buffer|flat}}_load_ubyte -; GCN: {{buffer|flat}}_load_ubyte +; GCN: {{buffer|flat}}_load_dword ; GCN: buffer_store_dword ; GCN: s_endpgm define amdgpu_kernel void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind { Index: test/CodeGen/Mips/cconv/return-hard-fp128.ll =================================================================== --- test/CodeGen/Mips/cconv/return-hard-fp128.ll +++ test/CodeGen/Mips/cconv/return-hard-fp128.ll @@ -18,14 +18,10 @@ } ; ALL-LABEL: retldouble: -; N32-DAG: ld [[R2:\$[0-9]+]], %lo(fp128)([[R1:\$[0-9]+]]) +; N32-DAG: ldc1 $f0, %lo(fp128)([[R1:\$[0-9]+]]) ; N32-DAG: addiu [[R3:\$[0-9]+]], [[R1]], %lo(fp128) -; N32-DAG: ld [[R4:\$[0-9]+]], 8([[R3]]) -; N32-DAG: dmtc1 [[R2]], $f0 -; N32-DAG: dmtc1 [[R4]], $f2 +; N32-DAG: ldc1 $f2, 8([[R3]]) ; N64-DAG: lui [[R2:\$[0-9]+]], %highest(fp128) -; N64-DAG: ld [[R3:\$[0-9]+]], %lo(fp128)([[R2]]) -; N64-DAG: ld [[R4:\$[0-9]+]], 8([[R2]]) -; N64-DAG: dmtc1 [[R3]], $f0 -; N64-DAG: dmtc1 [[R4]], $f2 +; N64-DAG: ldc1 $f0, %lo(fp128)([[R2]]) +; N64-DAG: ldc1 $f2, 8([[R2]]) Index: test/CodeGen/Mips/cconv/return-hard-struct-f128.ll =================================================================== --- test/CodeGen/Mips/cconv/return-hard-struct-f128.ll +++ test/CodeGen/Mips/cconv/return-hard-struct-f128.ll @@ -23,14 +23,10 @@ ; is returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to ; match the de facto ABI as implemented by GCC. ; N32-DAG: lui [[R1:\$[0-9]+]], %hi(struct_fp128) -; N32-DAG: ld [[R2:\$[0-9]+]], %lo(struct_fp128)([[R1]]) -; N32-DAG: dmtc1 [[R2]], $f0 +; N32-DAG: ldc1 $f0, %lo(struct_fp128)([[R1]]) ; N32-DAG: addiu [[R3:\$[0-9]+]], [[R1]], %lo(struct_fp128) -; N32-DAG: ld [[R4:\$[0-9]+]], 8([[R3]]) -; N32-DAG: dmtc1 [[R4]], $f1 +; N32-DAG: ldc1 $f1, 8([[R3]]) ; N64-DAG: lui [[R1:\$[0-9]+]], %highest(struct_fp128) -; N64-DAG: ld [[R2:\$[0-9]+]], %lo(struct_fp128)([[R1]]) -; N64-DAG: dmtc1 [[R2]], $f0 -; N64-DAG: ld [[R4:\$[0-9]+]], 8([[R1]]) -; N64-DAG: dmtc1 [[R4]], $f1 +; N64-DAG: ldc1 $f0, %lo(struct_fp128)([[R1]]) +; N64-DAG: ldc1 $f1, 8([[R1]]) Index: test/CodeGen/Mips/msa/bitcast.ll =================================================================== --- test/CodeGen/Mips/msa/bitcast.ll +++ test/CodeGen/Mips/msa/bitcast.ll @@ -362,14 +362,13 @@ } ; LITENDIAN: v8f16_to_v16i8: -; LITENDIAN: ld.h [[R1:\$w[0-9]+]], +; LITENDIAN: ld.b [[R1:\$w[0-9]+]], ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R1]] ; LITENDIAN: st.b [[R3]], ; LITENDIAN: .size v8f16_to_v16i8 ; BIGENDIAN: v8f16_to_v16i8: -; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], -; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R1]], 177 +; BIGENDIAN: ld.b [[R1:\$w[0-9]+]], ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]] ; BIGENDIAN: st.b [[R4]], ; BIGENDIAN: .size v8f16_to_v16i8 @@ -431,14 +430,13 @@ } ; LITENDIAN: v8f16_to_v4i32: -; LITENDIAN: ld.h [[R1:\$w[0-9]+]], +; LITENDIAN: ld.w [[R1:\$w[0-9]+]], ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] ; LITENDIAN: st.w [[R2]], ; LITENDIAN: .size v8f16_to_v4i32 ; BIGENDIAN: v8f16_to_v4i32: -; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], -; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177 +; BIGENDIAN: ld.w [[R1:\$w[0-9]+]], ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] ; BIGENDIAN: st.w [[R3]], ; BIGENDIAN: .size v8f16_to_v4i32 @@ -455,14 +453,13 @@ } ; LITENDIAN: v8f16_to_v4f32: -; LITENDIAN: ld.h [[R1:\$w[0-9]+]], +; LITENDIAN: ld.w [[R1:\$w[0-9]+]], ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] ; LITENDIAN: st.w [[R2]], ; LITENDIAN: .size v8f16_to_v4f32 ; BIGENDIAN: v8f16_to_v4f32: -; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], -; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177 +; BIGENDIAN: ld.w [[R1:\$w[0-9]+]], ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] ; BIGENDIAN: st.w [[R3]], ; BIGENDIAN: .size v8f16_to_v4f32 @@ -479,14 +476,13 @@ } ; LITENDIAN: v8f16_to_v2i64: -; LITENDIAN: ld.h [[R1:\$w[0-9]+]], +; LITENDIAN: ld.d [[R1:\$w[0-9]+]], ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] ; LITENDIAN: st.d [[R2]], ; LITENDIAN: .size v8f16_to_v2i64 ; BIGENDIAN: v8f16_to_v2i64: -; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], -; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27 +; BIGENDIAN: ld.d [[R1:\$w[0-9]+]], ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] ; BIGENDIAN: st.d [[R3]], ; BIGENDIAN: .size v8f16_to_v2i64 @@ -503,14 +499,13 @@ } ; LITENDIAN: v8f16_to_v2f64: -; LITENDIAN: ld.h [[R1:\$w[0-9]+]], +; LITENDIAN: ld.d [[R1:\$w[0-9]+]], ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] ; LITENDIAN: st.d [[R2]], ; LITENDIAN: .size v8f16_to_v2f64 ; BIGENDIAN: v8f16_to_v2f64: -; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], -; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27 +; BIGENDIAN: ld.d [[R1:\$w[0-9]+]], ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] ; BIGENDIAN: st.d [[R3]], ; BIGENDIAN: .size v8f16_to_v2f64