Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -2091,10 +2091,10 @@ def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src), "lxv $XT, $src", IIC_LdStLFD, []>; // Load DWord - def LXSD : DSForm_1<57, 2, (outs vrrc:$vD), (ins memrix:$src), + def LXSD : DSForm_1<57, 2, (outs vsfrc:$vD), (ins memrix:$src), "lxsd $vD, $src", IIC_LdStLFD, []>; // Load SP from src, convert it to DP, and place in dword[0] - def LXSSP : DSForm_1<57, 3, (outs vrrc:$vD), (ins memrix:$src), + def LXSSP : DSForm_1<57, 3, (outs vssrc:$vD), (ins memrix:$src), "lxssp $vD, $src", IIC_LdStLFD, []>; // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different @@ -2128,10 +2128,10 @@ def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst), "stxv $XT, $dst", IIC_LdStSTFD, []>; // Store DWord - def STXSD : DSForm_1<61, 2, (outs), (ins vrrc:$vS, memrix:$dst), + def STXSD : DSForm_1<61, 2, (outs), (ins vsfrc:$vS, memrix:$dst), "stxsd $vS, $dst", IIC_LdStSTFD, []>; // Convert DP of dword[0] to SP, and Store to dst - def STXSSP : DSForm_1<61, 3, (outs), (ins vrrc:$vS, memrix:$dst), + def STXSSP : DSForm_1<61, 3, (outs), (ins vssrc:$vS, memrix:$dst), "stxssp $vS, $dst", IIC_LdStSTFD, []>; // [PO S RA RB XO SX] @@ -2155,4 +2155,14 @@ def STXVL : X_XS6_RA5_RB5<31, 397, "stxvl" , vsrc, []>; def STXVLL : X_XS6_RA5_RB5<31, 429, "stxvll" , vsrc, []>; } // end mayStore + + // Prefer Power9 (aka Power v.3 instructions) + let AddedComplexity = 500 in { + def : Pat<(f64 (load iaddr:$src)), (LXSD iaddr:$src)>; + def : Pat<(f32 (load iaddr:$src)), (LXSSP iaddr:$src)>; + def : Pat<(f64 (extloadf32 iaddr:$src)), + (COPY_TO_REGCLASS (LXSSP iaddr:$src), VSFRC)>; + def : Pat<(store f64:$vS, iaddr:$dst), (STXSD $vS, iaddr:$dst)>; + def : Pat<(store f32:$vS, iaddr:$dst), (STXSSP $vS, iaddr:$dst)>; + } } // end HasP9Vector Index: test/CodeGen/PowerPC/dform-test.ll =================================================================== --- test/CodeGen/PowerPC/dform-test.ll +++ test/CodeGen/PowerPC/dform-test.ll @@ -0,0 +1,183 @@ +; RUN: llc < %s -march=ppc64 -mcpu=pwr9 -o - | FileCheck %s --check-prefix=PWR9 --check-prefix=CHECK +; RUN: llc < %s -march=ppc64 -mcpu=pwr8 -o - | FileCheck %s --check-prefix=PWR8 --check-prefix=CHECK + +; CHECK-LABEL: LXSD: +define void @LXSD(i32 zeroext %N) { +entry: + %cmp17 = icmp eq i32 %N, 0 + br i1 %cmp17, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body +; PWR9: lxsd{{[^x]}} +; PWR9: lxsd{{[^x]}} +; PWR9: lxsd{{[^x]}} +; PWR9: lxsd{{[^x]}} +; PWR8-NOT: lxsd{{[^x]}} + %i.018 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %call = tail call double* @getDoublePtr() + %0 = load double, double* %call, align 8 + %vecinit = insertelement <2 x double> , double %0, i32 1 + %arrayidx1 = getelementptr inbounds double, double* %call, i64 1 + %1 = load double, double* %arrayidx1, align 8 + %vecinit2 = insertelement <2 x double> , double %1, i32 1 + %arrayidx3 = getelementptr inbounds double, double* %call, i64 2 + %2 = load double, double* %arrayidx3, align 8 + %vecinit4 = insertelement <2 x double> , double %2, i32 1 + %arrayidx5 = getelementptr inbounds double, double* %call, i64 3 + %3 = load double, double* %arrayidx5, align 8 + %vecinit6 = insertelement <2 x double> , double %3, i32 1 + tail call void @passVSX(<2 x double> %vecinit, <2 x double> %vecinit2, <2 x double> %vecinit4, <2 x double> %vecinit6) + %inc = add nuw nsw i32 %i.018, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +declare double* @getDoublePtr() + +; CHECK-LABEL: LXSSP: +define void @LXSSP(i32 zeroext %N) #0 { +entry: + %cmp20 = icmp eq i32 %N, 0 + br i1 %cmp20, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body +; PWR9: lxssp{{[^x]}} +; PWR9: lxssp{{[^x]}} +; PWR9: lxssp{{[^x]}} +; PWR9: lxssp{{[^x]}} +; PWR8-NOT: lxssp{{[^x]}} + %i.021 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %call = tail call float* @getFloatPtr() + %0 = load float, float* %call, align 4 + %conv = fpext float %0 to double + %vecinit = insertelement <2 x double> , double %conv, i32 1 + %arrayidx1 = getelementptr inbounds float, float* %call, i64 1 + %1 = load float, float* %arrayidx1, align 4 + %conv2 = fpext float %1 to double + %vecinit3 = insertelement <2 x double> , double %conv2, i32 1 + %arrayidx4 = getelementptr inbounds float, float* %call, i64 2 + %2 = load float, float* %arrayidx4, align 4 + %conv5 = fpext float %2 to double + %vecinit6 = insertelement <2 x double> , double %conv5, i32 1 + %arrayidx7 = getelementptr inbounds float, float* %call, i64 3 + %3 = load float, float* %arrayidx7, align 4 + %conv8 = fpext float %3 to double + %vecinit9 = insertelement <2 x double> , double %conv8, i32 1 + tail call void @passVSX(<2 x double> %vecinit, <2 x double> %vecinit3, <2 x double> %vecinit6, <2 x double> %vecinit9) + %inc = add nuw nsw i32 %i.021, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +declare float* @getFloatPtr() + +; CHECK-LABEL: STXSD +define void @STXSD(i32 zeroext %N) { +entry: + %cmp17 = icmp eq i32 %N, 0 + br i1 %cmp17, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body +; PWR9: stxsd{{[^x]}} +; PWR9: stxsd{{[^x]}} +; PWR9: stxsd{{[^x]}} +; PWR9: stxsd{{[^x]}} +; PWR8-NOT: stxsd{{[^x]}} + %i.018 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %call = tail call double* @getDoublePtr() + %call1 = tail call <2 x double> @getVSX() + %vecext = extractelement <2 x double> %call1, i32 0 + store double %vecext, double* %call, align 8 + %call2 = tail call <2 x double> @getVSX() + %vecext3 = extractelement <2 x double> %call2, i32 0 + %arrayidx4 = getelementptr inbounds double, double* %call, i64 1 + store double %vecext3, double* %arrayidx4, align 8 + %call5 = tail call <2 x double> @getVSX() + %vecext6 = extractelement <2 x double> %call5, i32 0 + %arrayidx7 = getelementptr inbounds double, double* %call, i64 2 + store double %vecext6, double* %arrayidx7, align 8 + %call8 = tail call <2 x double> @getVSX() + %vecext9 = extractelement <2 x double> %call8, i32 0 + %arrayidx10 = getelementptr inbounds double, double* %call, i64 3 + store double %vecext9, double* %arrayidx10, align 8 + %inc = add nuw nsw i32 %i.018, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +declare <2 x double> @getVSX() + +define void @STXSSP(i32 zeroext %N) { +entry: + %cmp20 = icmp eq i32 %N, 0 + br i1 %cmp20, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body +; PWR9: stxssp{{[^x]}} +; PWR9: stxssp{{[^x]}} +; PWR9: stxssp{{[^x]}} +; PWR9: stxssp{{[^x]}} +; PWR8-NOT: stxssp{{[^x]}} + %i.021 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %call = tail call float* @getFloatPtr() + %call1 = tail call <2 x double> @getVSX() + %vecext = extractelement <2 x double> %call1, i32 0 + %conv = fptrunc double %vecext to float + store float %conv, float* %call, align 4 + %call2 = tail call <2 x double> @getVSX() + %vecext3 = extractelement <2 x double> %call2, i32 0 + %conv4 = fptrunc double %vecext3 to float + %arrayidx5 = getelementptr inbounds float, float* %call, i64 1 + store float %conv4, float* %arrayidx5, align 4 + %call6 = tail call <2 x double> @getVSX() + %vecext7 = extractelement <2 x double> %call6, i32 0 + %conv8 = fptrunc double %vecext7 to float + %arrayidx9 = getelementptr inbounds float, float* %call, i64 2 + store float %conv8, float* %arrayidx9, align 4 + %call10 = tail call <2 x double> @getVSX() + %vecext11 = extractelement <2 x double> %call10, i32 0 + %conv12 = fptrunc double %vecext11 to float + %arrayidx13 = getelementptr inbounds float, float* %call, i64 3 + store float %conv12, float* %arrayidx13, align 4 + %inc = add nuw nsw i32 %i.021, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +declare void @passVSX(<2 x double>, <2 x double>, <2 x double>, <2 x double>)