diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1252,7 +1252,6 @@ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); } else { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); @@ -10754,6 +10753,24 @@ return Op; if (Subtarget.isISA3_1()) { + // A f32 load feeding into a v4f32 insert_vector_elt is handled through a + // DAG Combine on P10 in order to allow this specific insert_vector_elt + // load patern to utilize the refactored load and store infrastructure. + // First, we convert the f32 load into an i32 load. This is done in + // order to produce a v4i32 insert_vector_elt. The v4f32 insert_vector_elt + // is then bitcasted back into an v4f32. + // VT represents the v4f32 insert_vector_elt, and V2 is the f32 load. + if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32)) { + if (LoadSDNode *LDN = dyn_cast(V2)) { + SDValue ConvertedLD = + DAG.getLoad(MVT::i32, dl, LDN->getChain(), LDN->getBasePtr(), + LDN->getPointerInfo(), LDN->getAlignment()); + SDValue ConvertedInsVecElt = + DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, V1, ConvertedLD, + Op.getOperand(2)); + return DAG.getBitcast(MVT::v4f32, ConvertedInsVecElt); + } + } if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64()) return SDValue(); // On P10, we have legal lowering for constant and variable indices for diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -2814,31 +2814,27 @@ def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)), (VINSWVRX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)), - (VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load PDForm:$rA)), i64:$rB)), (VINSWRX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load XForm:$rA)), i64:$rB)), (VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>; def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load DSForm:$rA)), i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load PDForm:$rA)), i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load XForm:$rA)), i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>; let AddedComplexity = 400 in { // Immediate vector insert element foreach Idx = [0, 1, 2, 3] in { def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, Idx)), (VINSW $vDi, !mul(!sub(3, Idx), 4), $rA)>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), Idx)), - (VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZ memri:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), Idx)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load PDForm:$rA)), Idx)), (VINSW $vDi, !mul(!sub(3, Idx), 4), (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), Idx)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load XForm:$rA)), Idx)), (VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZX memrr:$rA))>; } foreach i = [0, 1] in @@ -2858,11 +2854,9 @@ def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i32:$rB)), (VINSWVLX $vDi, InsertEltShift.Left2, (XSCVDPSPN $rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i32:$rB)), - (VINSWLX v4f32:$vDi, InsertEltShift.Left2, (LWZ memri:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i32:$rB)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load PDForm:$rA)), i32:$rB)), (VINSWLX v4f32:$vDi, InsertEltShift.Left2, (PLWZ memri34:$rA))>; - def: Pat<(v4f32(insertelt v4f32 : $vDi, (f32(load xaddr : $rA)), i32 : $rB)), + def: Pat<(v4f32(insertelt v4f32 : $vDi, (f32(load XForm : $rA)), i32 : $rB)), (VINSWLX v4f32 : $vDi, InsertEltShift.Left2, (LWZX memrr : $rA))>; } @@ -2879,20 +2873,18 @@ def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)), (VINSWVLX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)), - (VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load PDForm:$rA)), i64:$rB)), (VINSWLX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load XForm:$rA)), i64:$rB)), (VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>; def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load DSForm:$rA)), i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load PDForm:$rA)), i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load XForm:$rA)), i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>; } @@ -2902,13 +2894,10 @@ foreach Idx = [0, 1, 2, 3] in { def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, (Ty Idx))), (VINSW $vDi, !mul(Idx, 4), $rA)>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), - (Ty Idx))), - (VINSW $vDi, !mul(Idx, 4), (LWZ memri:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load PDForm:$rA)), (Ty Idx))), (VINSW $vDi, !mul(Idx, 4), (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load XForm:$rA)), (Ty Idx))), (VINSW $vDi, !mul(Idx, 4), (LWZX memrr:$rA))>; } diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll --- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll @@ -419,10 +419,9 @@ ; ; CHECK-32-P10-LABEL: testFloat3: ; CHECK-32-P10: # %bb.0: # %entry -; CHECK-32-P10-NEXT: lis 6, 1 -; CHECK-32-P10-NEXT: slwi 4, 4, 2 -; CHECK-32-P10-NEXT: lwzx 6, 3, 6 +; CHECK-32-P10-NEXT: plwz 6, 65536(3), 0 ; CHECK-32-P10-NEXT: lwz 3, 0(3) +; CHECK-32-P10-NEXT: slwi 4, 4, 2 ; CHECK-32-P10-NEXT: vinswlx 2, 4, 6 ; CHECK-32-P10-NEXT: slwi 4, 5, 2 ; CHECK-32-P10-NEXT: vinswlx 2, 4, 3 @@ -559,8 +558,7 @@ ; ; CHECK-32-P10-LABEL: testFloatImm3: ; CHECK-32-P10: # %bb.0: # %entry -; CHECK-32-P10-NEXT: lis 4, 4 -; CHECK-32-P10-NEXT: lwzx 4, 3, 4 +; CHECK-32-P10-NEXT: plwz 4, 262144(3), 0 ; CHECK-32-P10-NEXT: lwz 3, 0(3) ; CHECK-32-P10-NEXT: vinsw 2, 4, 0 ; CHECK-32-P10-NEXT: vinsw 2, 3, 8