diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -339,6 +339,9 @@ setOperationAction(ISD::FMA , MVT::f32, Legal); } + if (Subtarget.hasSPE()) + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // If we're enabling GP optimizations, use hardware square root diff --git a/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/llvm/lib/Target/PowerPC/PPCInstrSPE.td --- a/llvm/lib/Target/PowerPC/PPCInstrSPE.td +++ b/llvm/lib/Target/PowerPC/PPCInstrSPE.td @@ -819,16 +819,6 @@ } // HasSPE -let Predicates = [HasSPE] in { -def : Pat<(f64 (extloadf32 iaddr:$src)), - (COPY_TO_REGCLASS (SPELWZ iaddr:$src), SPERC)>; -def : Pat<(f64 (extloadf32 xaddr:$src)), - (COPY_TO_REGCLASS (SPELWZX xaddr:$src), SPERC)>; - -def : Pat<(f64 (fpextend f32:$src)), - (COPY_TO_REGCLASS $src, SPERC)>; -} - let Predicates = [HasSPE] in { def SELECT_CC_SPE4 : PPCCustomInserterPseudo<(outs spe4rc:$dst), (ins crrc:$cond, spe4rc:$T, spe4rc:$F, diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll --- a/llvm/test/CodeGen/PowerPC/spe.ll +++ b/llvm/test/CodeGen/PowerPC/spe.ll @@ -1422,3 +1422,64 @@ declare float @llvm.fma.f32(float, float, float) #1 attributes #1 = { nounwind readnone speculatable willreturn } + +%struct.a = type { float, float } + +define void @d(%struct.a* %e, %struct.a* %f) { +; CHECK-LABEL: d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stw 0, 4(1) +; CHECK-NEXT: stwu 1, -48(1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 4 +; CHECK-NEXT: .cfi_offset r29, -12 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: .cfi_offset r29, -40 +; CHECK-NEXT: .cfi_offset r30, -32 +; CHECK-NEXT: lwz 4, 0(4) +; CHECK-NEXT: lwz 3, 0(3) +; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill +; CHECK-NEXT: evstdd 29, 8(1) # 8-byte Folded Spill +; CHECK-NEXT: efdcfs 29, 4 +; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill +; CHECK-NEXT: mr 4, 29 +; CHECK-NEXT: evstdd 30, 16(1) # 8-byte Folded Spill +; CHECK-NEXT: efdcfs 30, 3 +; CHECK-NEXT: evmergehi 3, 29, 29 +; CHECK-NEXT: mtctr 3 +; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 +; CHECK-NEXT: bctrl +; CHECK-NEXT: evmergehi 3, 30, 30 +; CHECK-NEXT: mr 4, 30 +; CHECK-NEXT: mtctr 3 +; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 +; CHECK-NEXT: bctrl +; CHECK-NEXT: li 3, .LCPI58_0@l +; CHECK-NEXT: lis 4, .LCPI58_0@ha +; CHECK-NEXT: evlddx 3, 4, 3 +; CHECK-NEXT: evldd 30, 16(1) # 8-byte Folded Reload +; CHECK-NEXT: efdmul 3, 29, 3 +; CHECK-NEXT: evldd 29, 8(1) # 8-byte Folded Reload +; CHECK-NEXT: efscfd 3, 3 +; CHECK-NEXT: stw 3, 0(3) +; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 0, 52(1) +; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +entry: + %0 = getelementptr %struct.a, %struct.a* %f, i32 0, i32 0 + %1 = load float, float* undef + %conv = fpext float %1 to double + %2 = load float, float* %0 + %g = fpext float %2 to double + %3 = call i32 undef(double %g) + %h = call i32 undef(double %conv) + %n = sitofp i32 %3 to double + %k = fmul double %g, %n + %l = fptrunc double %k to float + store float %l, float* undef + ret void +}