diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1202,6 +1202,11 @@ /// the store instruction as otherwise there is no way to signal whether it was /// combined or not: IC.EraseInstFromFunction returns a null pointer. static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI) { + // Opaque stores do not have pointer types by definition, so there is no + // need to match stores to the type of the value being stored. Any bitcasts + // present serve some other purpose. + if (SI.getPointerOperandType()->isOpaquePointerTy()) + return false; // FIXME: We could probably with some care handle both volatile and ordered // atomic stores here but it isn't clear that this is important. if (!SI.isUnordered()) diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -958,7 +958,8 @@ ; GCN-PRELINK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[TMP0]], -2147483648 ; GCN-PRELINK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32 ; GCN-PRELINK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]] -; GCN-PRELINK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[A]], align 4 +; GCN-PRELINK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float +; GCN-PRELINK-NEXT: store float [[TMP3]], ptr addrspace(1) [[A]], align 4 ; GCN-PRELINK-NEXT: ret void ; ; GCN-NATIVE-LABEL: define amdgpu_kernel void @test_pow @@ -973,7 +974,8 @@ ; GCN-NATIVE-NEXT: [[__POW_SIGN:%.*]] = and i32 [[TMP0]], -2147483648 ; GCN-NATIVE-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32 ; GCN-NATIVE-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]] -; GCN-NATIVE-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[A]], align 4 +; GCN-NATIVE-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float +; GCN-NATIVE-NEXT: store float [[TMP3]], ptr addrspace(1) [[A]], align 4 ; GCN-NATIVE-NEXT: ret void ; entry: @@ -1056,7 +1058,8 @@ ; GCN-PRELINK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]] ; GCN-PRELINK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32 ; GCN-PRELINK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]] -; GCN-PRELINK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[A]], align 4 +; GCN-PRELINK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float +; GCN-PRELINK-NEXT: store float [[TMP3]], ptr addrspace(1) [[A]], align 4 ; GCN-PRELINK-NEXT: ret void ; ; GCN-NATIVE-LABEL: define amdgpu_kernel void @test_pown @@ -1076,7 +1079,8 @@ ; GCN-NATIVE-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]] ; GCN-NATIVE-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32 ; GCN-NATIVE-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]] -; GCN-NATIVE-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[A]], align 4 +; GCN-NATIVE-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float +; GCN-NATIVE-NEXT: store float [[TMP3]], ptr addrspace(1) [[A]], align 4 ; GCN-NATIVE-NEXT: ret void ; entry: diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/store-new-type.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/store-new-type.ll --- a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/store-new-type.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/store-new-type.ll @@ -9,8 +9,9 @@ ; CHECK-LABEL: define <2 x i4> @shuf_bitcast_insert_use2 ; CHECK-SAME: (<2 x i8> [[V:%.*]], i8 [[X:%.*]], ptr [[P:%.*]]) { ; CHECK-NEXT: [[I:%.*]] = insertelement <2 x i8> [[V]], i8 [[X]], i64 0 -; CHECK-NEXT: store <2 x i8> [[I]], ptr [[P]], align 2, !DIAssignID [[DIASSIGNID6:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata <2 x i8> [[I]], metadata [[META7:![0-9]+]], metadata !DIExpression(), metadata [[DIASSIGNID6]], metadata ptr [[P]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]] +; CHECK-NEXT: [[B:%.*]] = bitcast <2 x i8> [[I]] to <4 x i4> +; CHECK-NEXT: store <4 x i4> [[B]], ptr [[P]], align 2, !DIAssignID [[DIASSIGNID6:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.assign(metadata <4 x i4> [[B]], metadata [[META7:![0-9]+]], metadata !DIExpression(), metadata [[DIASSIGNID6]], metadata ptr [[P]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]] ; CHECK-NEXT: [[R:%.*]] = bitcast i8 [[X]] to <2 x i4> ; CHECK-NEXT: ret <2 x i4> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/2012-6-7-vselect-bitcast.ll b/llvm/test/Transforms/InstCombine/2012-6-7-vselect-bitcast.ll --- a/llvm/test/Transforms/InstCombine/2012-6-7-vselect-bitcast.ll +++ b/llvm/test/Transforms/InstCombine/2012-6-7-vselect-bitcast.ll @@ -3,7 +3,8 @@ define void @foo(<16 x i8> %a, <16 x i8> %b, ptr %c) { ; CHECK-LABEL: @foo( -; CHECK-NEXT: store <16 x i8> [[B:%.*]], ptr [[C:%.*]], align 4 +; CHECK-NEXT: [[BB:%.*]] = bitcast <16 x i8> [[B:%.*]] to <4 x i32> +; CHECK-NEXT: store <4 x i32> [[BB]], ptr [[C:%.*]], align 4 ; CHECK-NEXT: ret void ; %aa = bitcast <16 x i8> %a to <4 x i32> diff --git a/llvm/test/Transforms/InstCombine/bitcast-phi-uselistorder.ll b/llvm/test/Transforms/InstCombine/bitcast-phi-uselistorder.ll --- a/llvm/test/Transforms/InstCombine/bitcast-phi-uselistorder.ll +++ b/llvm/test/Transforms/InstCombine/bitcast-phi-uselistorder.ll @@ -12,7 +12,8 @@ ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[TMP0:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[LOAD1]], [[IF]] ] -; CHECK-NEXT: store double [[TMP0]], ptr [[P:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[TMP0]] to i64 +; CHECK-NEXT: store i64 [[TMP1]], ptr [[P:%.*]], align 8 ; CHECK-NEXT: ret double [[TMP0]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/cast_phi.ll b/llvm/test/Transforms/InstCombine/cast_phi.ll --- a/llvm/test/Transforms/InstCombine/cast_phi.ll +++ b/llvm/test/Transforms/InstCombine/cast_phi.ll @@ -8,11 +8,12 @@ ; CHECK-NEXT: [[CALLA:%.*]] = alloca [258 x float], align 4 ; CHECK-NEXT: [[CALLB:%.*]] = alloca [258 x float], align 4 ; CHECK-NEXT: [[CONV_I:%.*]] = uitofp i32 [[INUMSTEPS:%.*]] to float +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[CONV_I]] to i32 ; CHECK-NEXT: [[CONV_I12:%.*]] = zext i32 [[TID:%.*]] to i64 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [258 x float], ptr [[CALLA]], i64 0, i64 [[CONV_I12]] -; CHECK-NEXT: store float [[CONV_I]], ptr [[ARRAYIDX3]], align 4 +; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX3]], align 4 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [258 x float], ptr [[CALLB]], i64 0, i64 [[CONV_I12]] -; CHECK-NEXT: store float [[CONV_I]], ptr [[ARRAYIDX6]], align 4 +; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX6]], align 4 ; CHECK-NEXT: [[CMP7:%.*]] = icmp eq i32 [[TID]], 0 ; CHECK-NEXT: br i1 [[CMP7]], label [[DOTBB1:%.*]], label [[DOTBB2:%.*]] ; CHECK: .bb1: @@ -25,49 +26,53 @@ ; CHECK-NEXT: [[CMP135:%.*]] = icmp sgt i32 [[INUMSTEPS]], 0 ; CHECK-NEXT: br i1 [[CMP135]], label [[DOTBB3:%.*]], label [[DOTBB8:%.*]] ; CHECK: .bb3: -; CHECK-NEXT: [[TMP1:%.*]] = phi float [ [[TMP10:%.*]], [[DOTBB12:%.*]] ], [ [[CONV_I]], [[DOTBB2]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi float [ [[TMP11:%.*]], [[DOTBB12]] ], [ [[CONV_I]], [[DOTBB2]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi float [ [[TMP13:%.*]], [[DOTBB12:%.*]] ], [ [[CONV_I]], [[DOTBB2]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi float [ [[TMP14:%.*]], [[DOTBB12]] ], [ [[CONV_I]], [[DOTBB2]] ] ; CHECK-NEXT: [[I12_06:%.*]] = phi i32 [ [[SUB:%.*]], [[DOTBB12]] ], [ [[INUMSTEPS]], [[DOTBB2]] ] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[I12_06]], [[BASE:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[I12_06]], [[BASE:%.*]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[I12_06]], 1 ; CHECK-NEXT: [[CONV_I9:%.*]] = sext i32 [[ADD]] to i64 ; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [258 x float], ptr [[CALLA]], i64 0, i64 [[CONV_I9]] ; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [258 x float], ptr [[CALLB]], i64 0, i64 [[CONV_I9]] ; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[I12_06]], [[BASE]] -; CHECK-NEXT: br i1 [[TMP3]], label [[DOTBB4:%.*]], label [[DOTBB5:%.*]] +; CHECK-NEXT: br i1 [[TMP4]], label [[DOTBB4:%.*]], label [[DOTBB5:%.*]] ; CHECK: .bb4: -; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX20]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX24]], align 4 -; CHECK-NEXT: [[ADD33:%.*]] = fadd float [[TMP5]], [[TMP4]] -; CHECK-NEXT: [[ADD33_1:%.*]] = fadd float [[ADD33]], [[TMP1]] -; CHECK-NEXT: [[ADD33_2:%.*]] = fadd float [[ADD33_1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX20]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX24]], align 4 +; CHECK-NEXT: [[ADD33:%.*]] = fadd float [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[ADD33_1:%.*]] = fadd float [[ADD33]], [[TMP2]] +; CHECK-NEXT: [[ADD33_2:%.*]] = fadd float [[ADD33_1]], [[TMP3]] ; CHECK-NEXT: br label [[DOTBB5]] ; CHECK: .bb5: -; CHECK-NEXT: [[TMP6:%.*]] = phi float [ [[ADD33_1]], [[DOTBB4]] ], [ [[TMP1]], [[DOTBB3]] ] -; CHECK-NEXT: [[TMP7:%.*]] = phi float [ [[ADD33_2]], [[DOTBB4]] ], [ [[TMP2]], [[DOTBB3]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi float [ [[ADD33_1]], [[DOTBB4]] ], [ [[TMP2]], [[DOTBB3]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi float [ [[ADD33_2]], [[DOTBB4]] ], [ [[TMP3]], [[DOTBB3]] ] ; CHECK-NEXT: br i1 [[CMP40]], label [[DOTBB6:%.*]], label [[DOTBB7:%.*]] ; CHECK: .bb6: -; CHECK-NEXT: store float [[TMP7]], ptr [[ARRAYIDX3]], align 4 -; CHECK-NEXT: store float [[TMP6]], ptr [[ARRAYIDX6]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast float [[TMP8]] to i32 +; CHECK-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast float [[TMP7]] to i32 +; CHECK-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX6]], align 4 ; CHECK-NEXT: br label [[DOTBB7]] ; CHECK: .bb7: -; CHECK-NEXT: br i1 [[TMP3]], label [[DOTBB9:%.*]], label [[DOTBB10:%.*]] +; CHECK-NEXT: br i1 [[TMP4]], label [[DOTBB9:%.*]], label [[DOTBB10:%.*]] ; CHECK: .bb8: ; CHECK-NEXT: ret void ; CHECK: .bb9: -; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX20]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX24]], align 4 -; CHECK-NEXT: [[ADD33_112:%.*]] = fadd float [[TMP9]], [[TMP8]] -; CHECK-NEXT: [[ADD33_1_1:%.*]] = fadd float [[ADD33_112]], [[TMP6]] -; CHECK-NEXT: [[ADD33_2_1:%.*]] = fadd float [[ADD33_1_1]], [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX20]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX24]], align 4 +; CHECK-NEXT: [[ADD33_112:%.*]] = fadd float [[TMP12]], [[TMP11]] +; CHECK-NEXT: [[ADD33_1_1:%.*]] = fadd float [[ADD33_112]], [[TMP7]] +; CHECK-NEXT: [[ADD33_2_1:%.*]] = fadd float [[ADD33_1_1]], [[TMP8]] ; CHECK-NEXT: br label [[DOTBB10]] ; CHECK: .bb10: -; CHECK-NEXT: [[TMP10]] = phi float [ [[ADD33_1_1]], [[DOTBB9]] ], [ [[TMP6]], [[DOTBB7]] ] -; CHECK-NEXT: [[TMP11]] = phi float [ [[ADD33_2_1]], [[DOTBB9]] ], [ [[TMP7]], [[DOTBB7]] ] +; CHECK-NEXT: [[TMP13]] = phi float [ [[ADD33_1_1]], [[DOTBB9]] ], [ [[TMP7]], [[DOTBB7]] ] +; CHECK-NEXT: [[TMP14]] = phi float [ [[ADD33_2_1]], [[DOTBB9]] ], [ [[TMP8]], [[DOTBB7]] ] ; CHECK-NEXT: br i1 [[CMP40]], label [[DOTBB11:%.*]], label [[DOTBB12]] ; CHECK: .bb11: -; CHECK-NEXT: store float [[TMP11]], ptr [[ARRAYIDX3]], align 4 -; CHECK-NEXT: store float [[TMP10]], ptr [[ARRAYIDX6]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast float [[TMP14]] to i32 +; CHECK-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast float [[TMP13]] to i32 +; CHECK-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX6]], align 4 ; CHECK-NEXT: br label [[DOTBB12]] ; CHECK: .bb12: ; CHECK-NEXT: [[SUB]] = add i32 [[I12_06]], -4 diff --git a/llvm/test/Transforms/InstCombine/icmp-vec.ll b/llvm/test/Transforms/InstCombine/icmp-vec.ll --- a/llvm/test/Transforms/InstCombine/icmp-vec.ll +++ b/llvm/test/Transforms/InstCombine/icmp-vec.ll @@ -586,7 +586,7 @@ ; CHECK-LABEL: @eq_cast_eq-1_use2( ; CHECK-NEXT: [[IC:%.*]] = icmp sgt <2 x i4> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[B:%.*]] = bitcast <2 x i1> [[IC]] to i2 -; CHECK-NEXT: store <2 x i1> [[IC]], ptr [[P:%.*]], align 1 +; CHECK-NEXT: store i2 [[B]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: [[R:%.*]] = icmp eq i2 [[B]], -1 ; CHECK-NEXT: ret i1 [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/insert-val-extract-elem.ll b/llvm/test/Transforms/InstCombine/insert-val-extract-elem.ll --- a/llvm/test/Transforms/InstCombine/insert-val-extract-elem.ll +++ b/llvm/test/Transforms/InstCombine/insert-val-extract-elem.ll @@ -6,7 +6,11 @@ ; CHECK-SAME: (ptr sret([2 x double]) [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; CHECK-NEXT: top: ; CHECK-NEXT: [[X:%.*]] = load <2 x double>, ptr [[TMP1]], align 16 -; CHECK-NEXT: store <2 x double> [[X]], ptr [[TMP0]], align 8 +; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x double> [[X]], i64 0 +; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x double> [[X]], i64 1 +; CHECK-NEXT: store double [[X0]], ptr [[TMP0]], align 8 +; CHECK-NEXT: [[DOTREPACK1:%.*]] = getelementptr inbounds [2 x double], ptr [[TMP0]], i64 0, i64 1 +; CHECK-NEXT: store double [[X1]], ptr [[DOTREPACK1]], align 8 ; CHECK-NEXT: ret void ; top: @@ -25,7 +29,11 @@ ; CHECK-SAME: (ptr sret([2 x i64]) [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; CHECK-NEXT: top: ; CHECK-NEXT: [[X:%.*]] = load <2 x i64>, ptr [[TMP1]], align 16 -; CHECK-NEXT: store <2 x i64> [[X]], ptr [[TMP0]], align 4 +; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x i64> [[X]], i64 1 +; CHECK-NEXT: [[X2:%.*]] = extractelement <2 x i64> [[X]], i64 0 +; CHECK-NEXT: store i64 [[X2]], ptr [[TMP0]], align 4 +; CHECK-NEXT: [[DOTREPACK1:%.*]] = getelementptr inbounds [2 x i64], ptr [[TMP0]], i64 0, i64 1 +; CHECK-NEXT: store i64 [[X1]], ptr [[DOTREPACK1]], align 4 ; CHECK-NEXT: ret void ; top: @@ -45,7 +53,17 @@ ; CHECK-SAME: (ptr sret([4 x float]) [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; CHECK-NEXT: top: ; CHECK-NEXT: [[X:%.*]] = load <4 x float>, ptr [[TMP1]], align 16 -; CHECK-NEXT: store <4 x float> [[X]], ptr [[TMP0]], align 4 +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[X]], i64 0 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[X]], i64 1 +; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[X]], i64 2 +; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x float> [[X]], i64 3 +; CHECK-NEXT: store float [[X0]], ptr [[TMP0]], align 4 +; CHECK-NEXT: [[DOTREPACK1:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 1 +; CHECK-NEXT: store float [[X1]], ptr [[DOTREPACK1]], align 4 +; CHECK-NEXT: [[DOTREPACK3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 2 +; CHECK-NEXT: store float [[X2]], ptr [[DOTREPACK3]], align 4 +; CHECK-NEXT: [[DOTREPACK5:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 3 +; CHECK-NEXT: store float [[X3]], ptr [[DOTREPACK5]], align 4 ; CHECK-NEXT: ret void ; top: @@ -69,7 +87,17 @@ ; CHECK-SAME: (ptr sret([[PSEUDOVEC:%.*]]) [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; CHECK-NEXT: top: ; CHECK-NEXT: [[X:%.*]] = load <4 x float>, ptr [[TMP1]], align 16 -; CHECK-NEXT: store <4 x float> [[X]], ptr [[TMP0]], align 4 +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[X]], i64 0 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[X]], i64 1 +; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[X]], i64 2 +; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x float> [[X]], i64 3 +; CHECK-NEXT: store float [[X0]], ptr [[TMP0]], align 4 +; CHECK-NEXT: [[DOTREPACK1:%.*]] = getelementptr inbounds [[PSEUDOVEC]], ptr [[TMP0]], i64 0, i32 1 +; CHECK-NEXT: store float [[X1]], ptr [[DOTREPACK1]], align 4 +; CHECK-NEXT: [[DOTREPACK3:%.*]] = getelementptr inbounds [[PSEUDOVEC]], ptr [[TMP0]], i64 0, i32 2 +; CHECK-NEXT: store float [[X2]], ptr [[DOTREPACK3]], align 4 +; CHECK-NEXT: [[DOTREPACK5:%.*]] = getelementptr inbounds [[PSEUDOVEC]], ptr [[TMP0]], i64 0, i32 3 +; CHECK-NEXT: store float [[X3]], ptr [[DOTREPACK5]], align 4 ; CHECK-NEXT: ret void ; top: diff --git a/llvm/test/Transforms/InstCombine/load-bitcast-select.ll b/llvm/test/Transforms/InstCombine/load-bitcast-select.ll --- a/llvm/test/Transforms/InstCombine/load-bitcast-select.ll +++ b/llvm/test/Transforms/InstCombine/load-bitcast-select.ll @@ -22,7 +22,8 @@ ; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt float [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[DOTV:%.*]] = select i1 [[CMP_I]], float [[TMP2]], float [[TMP1]] -; CHECK-NEXT: store float [[DOTV]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[DOTV]] to i32 +; CHECK-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 ; CHECK-NEXT: br label [[FOR_COND]] ; @@ -81,7 +82,8 @@ ; CHECK-NEXT: [[LD2:%.*]] = load float, ptr [[LOADADDR2:%.*]], align 4 ; CHECK-NEXT: [[COND:%.*]] = fcmp ogt float [[LD1]], [[LD2]] ; CHECK-NEXT: [[LD_V:%.*]] = select i1 [[COND]], float [[LD1]], float [[LD2]] -; CHECK-NEXT: store float [[LD_V]], ptr [[STOREADDR:%.*]], align 4 +; CHECK-NEXT: [[LD:%.*]] = bitcast float [[LD_V]] to i32 +; CHECK-NEXT: store i32 [[LD]], ptr [[STOREADDR:%.*]], align 4 ; CHECK-NEXT: ret void ; %ld1 = load float, ptr %loadaddr1, align 4 diff --git a/llvm/test/Transforms/InstCombine/load.ll b/llvm/test/Transforms/InstCombine/load.ll --- a/llvm/test/Transforms/InstCombine/load.ll +++ b/llvm/test/Transforms/InstCombine/load.ll @@ -215,7 +215,8 @@ ; CHECK-NEXT: store float [[X1]], ptr [[B:%.*]], align 4 ; CHECK-NEXT: [[X2:%.*]] = load float, ptr [[X]], align 4 ; CHECK-NEXT: store float [[X2]], ptr [[B]], align 4 -; CHECK-NEXT: store float [[X2]], ptr [[C:%.*]], align 4 +; CHECK-NEXT: [[X2_CAST:%.*]] = bitcast float [[X2]] to i32 +; CHECK-NEXT: store i32 [[X2_CAST]], ptr [[C:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -240,7 +241,8 @@ ; CHECK-NEXT: store <4 x i8> [[X1]], ptr [[B:%.*]], align 4 ; CHECK-NEXT: [[X2:%.*]] = load <4 x i8>, ptr [[X]], align 4 ; CHECK-NEXT: store <4 x i8> [[X2]], ptr [[B]], align 4 -; CHECK-NEXT: store <4 x i8> [[X2]], ptr [[C:%.*]], align 4 +; CHECK-NEXT: [[X2_CAST:%.*]] = bitcast <4 x i8> [[X2]] to i32 +; CHECK-NEXT: store i32 [[X2_CAST]], ptr [[C:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/InstCombine/multiple-uses-load-bitcast-select.ll b/llvm/test/Transforms/InstCombine/multiple-uses-load-bitcast-select.ll --- a/llvm/test/Transforms/InstCombine/multiple-uses-load-bitcast-select.ll +++ b/llvm/test/Transforms/InstCombine/multiple-uses-load-bitcast-select.ll @@ -9,8 +9,9 @@ ; CHECK-NEXT: [[LD2:%.*]] = load double, ptr [[Z1]], align 8 ; CHECK-NEXT: [[TMPVAR10:%.*]] = fcmp olt double [[LD1]], [[LD2]] ; CHECK-NEXT: [[TMPVAR12_V:%.*]] = select i1 [[TMPVAR10]], double [[LD1]], double [[LD2]] -; CHECK-NEXT: store double [[TMPVAR12_V]], ptr [[ST1:%.*]], align 8 -; CHECK-NEXT: store double [[TMPVAR12_V]], ptr [[ST2:%.*]], align 8 +; CHECK-NEXT: [[TMPVAR12:%.*]] = bitcast double [[TMPVAR12_V]] to i64 +; CHECK-NEXT: store i64 [[TMPVAR12]], ptr [[ST1:%.*]], align 8 +; CHECK-NEXT: store i64 [[TMPVAR12]], ptr [[ST2:%.*]], align 8 ; CHECK-NEXT: ret void ; %y1 = alloca double diff --git a/llvm/test/Transforms/InstCombine/pr25342.ll b/llvm/test/Transforms/InstCombine/pr25342.ll --- a/llvm/test/Transforms/InstCombine/pr25342.ll +++ b/llvm/test/Transforms/InstCombine/pr25342.ll @@ -31,8 +31,10 @@ ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.end: -; CHECK-NEXT: store float [[TMP0]], ptr @dd, align 4 -; CHECK-NEXT: store float [[TMP1]], ptr getelementptr inbounds (%"struct.std::complex", ptr @dd, i64 0, i32 0, i32 1), align 4 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast float [[TMP0]] to i32 +; CHECK-NEXT: store i32 [[TMP6]], ptr @dd, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast float [[TMP1]] to i32 +; CHECK-NEXT: store i32 [[TMP7]], ptr getelementptr inbounds (%"struct.std::complex", ptr @dd, i64 0, i32 0, i32 1), align 4 ; CHECK-NEXT: ret void ; entry: @@ -102,7 +104,8 @@ ; CHECK-NEXT: [[TMP6]] = phi float [ [[ADD_I]], [[FOR_BODY]] ], [ [[TMP5]], [[EVEN_BB]] ] ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.end: -; CHECK-NEXT: store float [[TMP0]], ptr @dd, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast float [[TMP0]] to i32 +; CHECK-NEXT: store i32 [[TMP7]], ptr @dd, align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -752,7 +752,8 @@ ; CHECK-LABEL: @select_oneuse_bitcast( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult [[C:%.*]], [[D:%.*]] ; CHECK-NEXT: [[SEL1_V:%.*]] = select [[CMP]], [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: store [[SEL1_V]], ptr [[PTR1:%.*]], align 16 +; CHECK-NEXT: [[SEL1:%.*]] = bitcast [[SEL1_V]] to +; CHECK-NEXT: store [[SEL1]], ptr [[PTR1:%.*]], align 16 ; CHECK-NEXT: ret void ; %cmp = icmp ult %c, %d @@ -770,9 +771,11 @@ ; CHECK-LABEL: @min_max_bitcast( ; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x float> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[SEL1_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> [[A]], <4 x float> [[B]] +; CHECK-NEXT: [[SEL1:%.*]] = bitcast <4 x float> [[SEL1_V]] to <4 x i32> ; CHECK-NEXT: [[SEL2_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> [[B]], <4 x float> [[A]] -; CHECK-NEXT: store <4 x float> [[SEL1_V]], ptr [[PTR1:%.*]], align 16 -; CHECK-NEXT: store <4 x float> [[SEL2_V]], ptr [[PTR2:%.*]], align 16 +; CHECK-NEXT: [[SEL2:%.*]] = bitcast <4 x float> [[SEL2_V]] to <4 x i32> +; CHECK-NEXT: store <4 x i32> [[SEL1]], ptr [[PTR1:%.*]], align 16 +; CHECK-NEXT: store <4 x i32> [[SEL2]], ptr [[PTR2:%.*]], align 16 ; CHECK-NEXT: ret void ; %cmp = fcmp olt <4 x float> %a, %b @@ -789,9 +792,11 @@ ; CHECK-LABEL: @min_max_bitcast1( ; CHECK-NEXT: [[CMP:%.*]] = fcmp olt [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[SEL1_V:%.*]] = select [[CMP]], [[A]], [[B]] +; CHECK-NEXT: [[SEL1:%.*]] = bitcast [[SEL1_V]] to ; CHECK-NEXT: [[SEL2_V:%.*]] = select [[CMP]], [[B]], [[A]] -; CHECK-NEXT: store [[SEL1_V]], ptr [[PTR1:%.*]], align 16 -; CHECK-NEXT: store [[SEL2_V]], ptr [[PTR2:%.*]], align 16 +; CHECK-NEXT: [[SEL2:%.*]] = bitcast [[SEL2_V]] to +; CHECK-NEXT: store [[SEL1]], ptr [[PTR1:%.*]], align 16 +; CHECK-NEXT: store [[SEL2]], ptr [[PTR2:%.*]], align 16 ; CHECK-NEXT: ret void ; %cmp = fcmp olt %a, %b diff --git a/llvm/test/Transforms/InstCombine/select_meta.ll b/llvm/test/Transforms/InstCombine/select_meta.ll --- a/llvm/test/Transforms/InstCombine/select_meta.ll +++ b/llvm/test/Transforms/InstCombine/select_meta.ll @@ -32,9 +32,11 @@ ; CHECK-LABEL: @min_max_bitcast( ; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x float> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[SEL1_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> [[A]], <4 x float> [[B]], !prof [[PROF0]] +; CHECK-NEXT: [[SEL1:%.*]] = bitcast <4 x float> [[SEL1_V]] to <4 x i32> ; CHECK-NEXT: [[SEL2_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> [[B]], <4 x float> [[A]], !prof [[PROF0]] -; CHECK-NEXT: store <4 x float> [[SEL1_V]], ptr [[PTR1:%.*]], align 16 -; CHECK-NEXT: store <4 x float> [[SEL2_V]], ptr [[PTR2:%.*]], align 16 +; CHECK-NEXT: [[SEL2:%.*]] = bitcast <4 x float> [[SEL2_V]] to <4 x i32> +; CHECK-NEXT: store <4 x i32> [[SEL1]], ptr [[PTR1:%.*]], align 16 +; CHECK-NEXT: store <4 x i32> [[SEL2]], ptr [[PTR2:%.*]], align 16 ; CHECK-NEXT: ret void ; %cmp = fcmp olt <4 x float> %a, %b diff --git a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll --- a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll +++ b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll @@ -196,7 +196,8 @@ define <2 x i4> @shuf_bitcast_insert_use2(<2 x i8> %v, i8 %x, ptr %p) { ; CHECK-LABEL: @shuf_bitcast_insert_use2( ; CHECK-NEXT: [[I:%.*]] = insertelement <2 x i8> [[V:%.*]], i8 [[X:%.*]], i64 0 -; CHECK-NEXT: store <2 x i8> [[I]], ptr [[P:%.*]], align 2 +; CHECK-NEXT: [[B:%.*]] = bitcast <2 x i8> [[I]] to <4 x i4> +; CHECK-NEXT: store <4 x i4> [[B]], ptr [[P:%.*]], align 2 ; CHECK-NEXT: [[R:%.*]] = bitcast i8 [[X]] to <2 x i4> ; CHECK-NEXT: ret <2 x i4> [[R]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/bitcast-store-branch.ll b/llvm/test/Transforms/PhaseOrdering/bitcast-store-branch.ll --- a/llvm/test/Transforms/PhaseOrdering/bitcast-store-branch.ll +++ b/llvm/test/Transforms/PhaseOrdering/bitcast-store-branch.ll @@ -16,19 +16,13 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[F]], i64 64 ; CHECK-NEXT: [[F_VAL:%.*]] = load ptr, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[VAL1_CAST_I:%.*]] = bitcast half [[VAL1]] to i16 ; CHECK-NEXT: [[CMP_NOT_NOT_I:%.*]] = icmp eq i32 [[VAL3]], 0 -; CHECK-NEXT: br i1 [[CMP_NOT_NOT_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I:%.*]] -; CHECK: if.then.i: -; CHECK-NEXT: store half [[VAL1]], ptr [[F_VAL]], align 2 -; CHECK-NEXT: [[ADD_PTR_I_I_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[F_VAL]], i64 16 -; CHECK-NEXT: br label [[BADCHILD_EXIT:%.*]] -; CHECK: if.else.i: -; CHECK-NEXT: [[ADD_PTR_I_I_I_I7_I:%.*]] = getelementptr inbounds i8, ptr [[F_VAL]], i64 16 -; CHECK-NEXT: store half [[VAL1]], ptr [[ADD_PTR_I_I_I_I7_I]], align 2 -; CHECK-NEXT: br label [[BADCHILD_EXIT]] -; CHECK: badChild.exit: -; CHECK-NEXT: [[THIS_64_VAL_SINK_I:%.*]] = phi ptr [ [[F_VAL]], [[IF_ELSE_I]] ], [ [[ADD_PTR_I_I_I_I_I]], [[IF_THEN_I]] ] -; CHECK-NEXT: store i16 [[VAL2]], ptr [[THIS_64_VAL_SINK_I]], align 2 +; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[CMP_NOT_NOT_I]], i16 [[VAL1_CAST_I]], i16 [[VAL2]] +; CHECK-NEXT: [[SPEC_SELECT1_I:%.*]] = select i1 [[CMP_NOT_NOT_I]], i16 [[VAL2]], i16 [[VAL1_CAST_I]] +; CHECK-NEXT: store i16 [[SPEC_SELECT_I]], ptr [[F_VAL]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[F_VAL]], i64 16 +; CHECK-NEXT: store i16 [[SPEC_SELECT1_I]], ptr [[TMP1]], align 2 ; CHECK-NEXT: ret ptr [[F]] ; entry: