Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -2364,6 +2364,14 @@ return nullptr; } + /// Given a set in interconnected phis of type 'From' that are loaded/stored + /// or bitcast to type 'To', return true if the set should be converted to + /// 'To'. + virtual bool shouldConvertPhiType(Type *From, Type *To) const { + return (From->isIntegerTy() || From->isFloatingPointTy()) && + (To->isIntegerTy() || To->isFloatingPointTy()); + } + /// Returns true if the opcode is a commutative binary operation. virtual bool isCommutativeBinOp(unsigned Opcode) const { // FIXME: This should get its info from the td file. Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -407,6 +407,9 @@ unsigned CreatedInstsCost = 0); bool mergeSExts(Function &F); bool splitLargeGEPOffsets(); + bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl &Visited, + SmallPtrSetImpl &DeletedInstrs); + bool optimizePhiTypes(Function &F); bool performAddressTypePromotion( Instruction *&Inst, bool AllowPromotionWithoutCommonHeader, @@ -515,6 +518,7 @@ MadeChange |= mergeSExts(F); if (!LargeOffsetGEPMap.empty()) MadeChange |= splitLargeGEPOffsets(); + MadeChange |= optimizePhiTypes(F); if (MadeChange) eliminateFallThrough(F); @@ -5717,6 +5721,152 @@ return Changed; } +bool CodeGenPrepare::optimizePhiType( + PHINode *I, SmallPtrSetImpl &Visited, + SmallPtrSetImpl &DeletedInstrs) { + // We are looking for a collection on interconnected phi nodes that together + // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts + // are of the same type. Convert the whole set of nodes to the type of the + // bitcast. + Type *PhiTy = I->getType(); + Type *ConvertTy = nullptr; + if (Visited.count(I) || + (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy())) + return false; + + SmallVector Worklist; + Worklist.push_back(cast(I)); + SmallPtrSet PhiNodes; + PhiNodes.insert(I); + Visited.insert(I); + SmallPtrSet Defs; + SmallPtrSet Uses; + + while (!Worklist.empty()) { + Instruction *II = Worklist.pop_back_val(); + + if (auto *Phi = dyn_cast(II)) { + // Handle Defs, which might also be PHI's + for (Value *V : Phi->incoming_values()) { + if (auto *OpPhi = dyn_cast(V)) { + if (!PhiNodes.count(OpPhi)) { + if (Visited.count(OpPhi)) + return false; + PhiNodes.insert(OpPhi); + Visited.insert(OpPhi); + Worklist.push_back(OpPhi); + } + } else if (auto *OpLoad = dyn_cast(V)) { + if (!Defs.count(OpLoad)) { + Defs.insert(OpLoad); + Worklist.push_back(OpLoad); + } + } else if (auto *OpEx = dyn_cast(V)) { + if (!Defs.count(OpEx)) { + Defs.insert(OpEx); + Worklist.push_back(OpEx); + } + } else if (auto *OpBC = dyn_cast(V)) { + if (!ConvertTy) + ConvertTy = OpBC->getOperand(0)->getType(); + if (OpBC->getOperand(0)->getType() != ConvertTy) + return false; + if (!Defs.count(OpBC)) { + Defs.insert(OpBC); + Worklist.push_back(OpBC); + } + } else if (!isa(V)) + return false; + } + } + + // Handle uses which might also be phi's + for (User *V : II->users()) { + if (auto *OpPhi = dyn_cast(V)) { + if (!PhiNodes.count(OpPhi)) { + if (Visited.count(OpPhi)) + return false; + PhiNodes.insert(OpPhi); + Visited.insert(OpPhi); + Worklist.push_back(OpPhi); + } + } else if (auto *OpStore = dyn_cast(V)) { + if (OpStore->getOperand(0) != II) + return false; + Uses.insert(OpStore); + } else if (auto *OpBC = dyn_cast(V)) { + if (!ConvertTy) + ConvertTy = OpBC->getType(); + if (OpBC->getType() != ConvertTy) + return false; + Uses.insert(OpBC); + } else + return false; + } + } + + if (!ConvertTy || !TLI->shouldConvertPhiType(PhiTy, ConvertTy)) + return false; + + LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to " + << *ConvertTy << "\n"); + + // Create all the new phi nodes of the new type, and bitcast any loads to the + // correct type. + ValueToValueMap ValMap; + ValMap[UndefValue::get(PhiTy)] = UndefValue::get(ConvertTy); + for (Instruction *D : Defs) { + if (isa(D)) + ValMap[D] = D->getOperand(0); + else + ValMap[D] = + new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode()); + } + for (PHINode *Phi : PhiNodes) + ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(), + Phi->getName() + ".tc", Phi); + // Pipe together all the PhiNodes. + for (PHINode *Phi : PhiNodes) { + PHINode *NewPhi = cast(ValMap[Phi]); + for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++) + NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)], + Phi->getIncomingBlock(i)); + } + // And finally pipe up the stores and bitcasts + for (Instruction *U : Uses) { + if (isa(U)) { + DeletedInstrs.insert(U); + U->replaceAllUsesWith(ValMap[U->getOperand(0)]); + } else + U->setOperand(0, + new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U)); + } + + // Save the removed phis to be deleted later. + for (PHINode *Phi : PhiNodes) + DeletedInstrs.insert(Phi); + return true; +} + +bool CodeGenPrepare::optimizePhiTypes(Function &F) { + bool Changed = false; + SmallPtrSet Visited; + SmallPtrSet DeletedInstrs; + + // Attempt to optimize all the phis in the functions to the correct type. + for (auto &BB : F) + for (auto &Phi : BB.phis()) + Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs); + + // Remove any old phi's that have been converted. + for (auto *I : DeletedInstrs) { + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } + + return Changed; +} + /// Return true, if an ext(load) can be formed from an extension in /// \p MovedExts. bool CodeGenPrepare::canFormExtLd( Index: llvm/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.h +++ llvm/lib/Target/X86/X86ISelLowering.h @@ -1181,6 +1181,7 @@ bool shouldSinkOperands(Instruction *I, SmallVectorImpl &Ops) const override; + bool shouldConvertPhiType(Type *From, Type *To) const override; /// Return true if folding a vector load into ExtVal (a sign, zero, or any /// extend node) is profitable. Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30740,6 +30740,12 @@ return false; } +bool X86TargetLowering::shouldConvertPhiType(Type *From, Type *To) const { + if (!Subtarget.is64Bit()) + return false; + return TargetLowering::shouldConvertPhiType(From, To); +} + bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { if (isa(ExtVal.getOperand(0))) return false; Index: llvm/test/CodeGen/AArch64/convertphitype.ll =================================================================== --- llvm/test/CodeGen/AArch64/convertphitype.ll +++ llvm/test/CodeGen/AArch64/convertphitype.ll @@ -11,14 +11,15 @@ ; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 +; CHECK-NEXT: [[LS_BC:%.*]] = bitcast i32 [[LS]] to float ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: else: ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4 +; CHECK-NEXT: [[LD_BC:%.*]] = bitcast i32 [[LD]] to float ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ] -; CHECK-NEXT: [[B:%.*]] = bitcast i32 [[PHI]] to float -; CHECK-NEXT: ret float [[B]] +; CHECK-NEXT: [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ] +; CHECK-NEXT: ret float [[PHI_TC]] ; entry: %cmp15 = icmp sgt i32 %n, 0 @@ -45,11 +46,11 @@ ; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]] ; CHECK: then: ; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 +; CHECK-NEXT: [[LS_BC:%.*]] = bitcast i32 [[LS]] to float ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ undef, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[B:%.*]] = bitcast i32 [[PHI]] to float -; CHECK-NEXT: ret float [[B]] +; CHECK-NEXT: [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ undef, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret float [[PHI_TC]] ; entry: %cmp15 = icmp sgt i32 %n, 0 @@ -73,11 +74,11 @@ ; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]] ; CHECK: then: ; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 +; CHECK-NEXT: [[LS_BC:%.*]] = bitcast i32 [[LS]] to float ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[FB]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[B:%.*]] = bitcast i32 [[PHI]] to float -; CHECK-NEXT: ret float [[B]] +; CHECK-NEXT: [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[F]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret float [[PHI_TC]] ; entry: %cmp15 = icmp sgt i32 %n, 0 @@ -102,10 +103,12 @@ ; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]] ; CHECK: then: ; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 +; CHECK-NEXT: [[LS_BC:%.*]] = bitcast i32 [[LS]] to float ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[FB]], [[ENTRY:%.*]] ] -; CHECK-NEXT: store i32 [[PHI]], i32* [[D:%.*]], align 4 +; CHECK-NEXT: [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[F]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC:%.*]] = bitcast float [[PHI_TC]] to i32 +; CHECK-NEXT: store i32 [[BC]], i32* [[D:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -130,14 +133,15 @@ ; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[LS:%.*]] = load double, double* [[S:%.*]], align 4 +; CHECK-NEXT: [[LS_BC:%.*]] = bitcast double [[LS]] to i64 ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: else: ; CHECK-NEXT: [[LD:%.*]] = load double, double* [[D:%.*]], align 4 +; CHECK-NEXT: [[LD_BC:%.*]] = bitcast double [[LD]] to i64 ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[PHI:%.*]] = phi double [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ] -; CHECK-NEXT: [[B:%.*]] = bitcast double [[PHI]] to i64 -; CHECK-NEXT: ret i64 [[B]] +; CHECK-NEXT: [[PHI_TC:%.*]] = phi i64 [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ] +; CHECK-NEXT: ret i64 [[PHI_TC]] ; entry: %cmp15 = icmp sgt i32 %n, 0 @@ -164,14 +168,15 @@ ; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[LS:%.*]] = load float, float* [[S:%.*]], align 4 +; CHECK-NEXT: [[LS_BC:%.*]] = bitcast float [[LS]] to i32 ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: else: ; CHECK-NEXT: [[LD:%.*]] = load float, float* [[D:%.*]], align 4 +; CHECK-NEXT: [[LD_BC:%.*]] = bitcast float [[LD]] to i32 ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ] -; CHECK-NEXT: [[B:%.*]] = bitcast float [[PHI]] to i32 -; CHECK-NEXT: ret i32 [[B]] +; CHECK-NEXT: [[PHI_TC:%.*]] = phi i32 [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ] +; CHECK-NEXT: ret i32 [[PHI_TC]] ; entry: %cmp15 = icmp sgt i32 %n, 0 @@ -198,14 +203,15 @@ ; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[LS:%.*]] = load half, half* [[S:%.*]], align 4 +; CHECK-NEXT: [[LS_BC:%.*]] = bitcast half [[LS]] to i16 ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: else: ; CHECK-NEXT: [[LD:%.*]] = load half, half* [[D:%.*]], align 4 +; CHECK-NEXT: [[LD_BC:%.*]] = bitcast half [[LD]] to i16 ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[PHI:%.*]] = phi half [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ] -; CHECK-NEXT: [[B:%.*]] = bitcast half [[PHI]] to i16 -; CHECK-NEXT: ret i16 [[B]] +; CHECK-NEXT: [[PHI_TC:%.*]] = phi i16 [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ] +; CHECK-NEXT: ret i16 [[PHI_TC]] ; entry: %cmp15 = icmp sgt i32 %n, 0 @@ -232,14 +238,15 @@ ; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[LS:%.*]] = load fp128, fp128* [[S:%.*]], align 4 +; CHECK-NEXT: [[LS_BC:%.*]] = bitcast fp128 [[LS]] to i128 ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: else: ; CHECK-NEXT: [[LD:%.*]] = load fp128, fp128* [[D:%.*]], align 4 +; CHECK-NEXT: [[LD_BC:%.*]] = bitcast fp128 [[LD]] to i128 ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[PHI:%.*]] = phi fp128 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ] -; CHECK-NEXT: [[B:%.*]] = bitcast fp128 [[PHI]] to i128 -; CHECK-NEXT: ret i128 [[B]] +; CHECK-NEXT: [[PHI_TC:%.*]] = phi i128 [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ] +; CHECK-NEXT: ret i128 [[PHI_TC]] ; entry: %cmp15 = icmp sgt i32 %n, 0 @@ -298,18 +305,19 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[N:%.*]], 0 ; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 +; CHECK-NEXT: [[LS_BC:%.*]] = bitcast i32 [[LS]] to float ; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP:%.*]], label [[END:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[LPHI:%.*]] = phi i32 [ [[LS]], [[ENTRY]] ], [ [[LD:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[LD]] = load i32, i32* [[D:%.*]], align 4 +; CHECK-NEXT: [[LPHI_TC:%.*]] = phi float [ [[LS_BC]], [[ENTRY]] ], [ [[LD_BC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4 +; CHECK-NEXT: [[LD_BC]] = bitcast i32 [[LD]] to float ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[END]], label [[LOOP]] ; CHECK: end: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[LPHI]], [[LOOP]] ] -; CHECK-NEXT: [[B:%.*]] = bitcast i32 [[PHI]] to float -; CHECK-NEXT: ret float [[B]] +; CHECK-NEXT: [[PHI_TC:%.*]] = phi float [ undef, [[ENTRY]] ], [ [[LPHI_TC]], [[LOOP]] ] +; CHECK-NEXT: ret float [[PHI_TC]] ; entry: %cmp15 = icmp sgt i64 %n, 0 @@ -370,19 +378,20 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[N:%.*]], 0 ; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 +; CHECK-NEXT: [[LS_BC:%.*]] = bitcast i32 [[LS]] to float ; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP:%.*]], label [[END:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[LPHI:%.*]] = phi i32 [ [[LS]], [[ENTRY]] ], [ [[LD:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[LPHI2:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[LPHI]], [[LOOP]] ] -; CHECK-NEXT: [[LD]] = load i32, i32* [[D:%.*]], align 4 +; CHECK-NEXT: [[LPHI_TC:%.*]] = phi float [ [[LS_BC]], [[ENTRY]] ], [ [[LD_BC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[LPHI2_TC:%.*]] = phi float [ undef, [[ENTRY]] ], [ [[LPHI_TC]], [[LOOP]] ] +; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4 +; CHECK-NEXT: [[LD_BC]] = bitcast i32 [[LD]] to float ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[END]], label [[LOOP]] ; CHECK: end: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[LPHI2]], [[LOOP]] ] -; CHECK-NEXT: [[B:%.*]] = bitcast i32 [[PHI]] to float -; CHECK-NEXT: ret float [[B]] +; CHECK-NEXT: [[PHI_TC:%.*]] = phi float [ undef, [[ENTRY]] ], [ [[LPHI2_TC]], [[LOOP]] ] +; CHECK-NEXT: ret float [[PHI_TC]] ; entry: %cmp15 = icmp sgt i64 %n, 0 @@ -409,31 +418,33 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 1 ; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 +; CHECK-NEXT: [[LS_BC:%.*]] = bitcast i32 [[LS]] to float ; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[IFEND:%.*]] ; CHECK: then: ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4 +; CHECK-NEXT: [[LD_BC:%.*]] = bitcast i32 [[LD]] to float ; CHECK-NEXT: br label [[IFEND]] ; CHECK: ifend: -; CHECK-NEXT: [[PHI1:%.*]] = phi i32 [ [[LD]], [[THEN]] ], [ [[LS]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[PHI1_TC:%.*]] = phi float [ [[LD_BC]], [[THEN]] ], [ [[LS_BC]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[N]], 0 ; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP:%.*]], label [[END:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[IFEND]] ], [ [[IV_NEXT:%.*]], [[LOOPEND:%.*]] ] -; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ [[PHI1]], [[IFEND]] ], [ [[PHI3:%.*]], [[LOOPEND]] ] +; CHECK-NEXT: [[PHI2_TC:%.*]] = phi float [ [[PHI1_TC]], [[IFEND]] ], [ [[PHI3_TC:%.*]], [[LOOPEND]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N]], 1 ; CHECK-NEXT: br i1 [[TMP0]], label [[LOOPTHEN:%.*]], label [[LOOPEND]] ; CHECK: loopthen: ; CHECK-NEXT: [[LL:%.*]] = load i32, i32* [[D]], align 4 +; CHECK-NEXT: [[LL_BC:%.*]] = bitcast i32 [[LL]] to float ; CHECK-NEXT: br label [[LOOPEND]] ; CHECK: loopend: -; CHECK-NEXT: [[PHI3]] = phi i32 [ [[LL]], [[LOOPTHEN]] ], [ [[PHI2]], [[LOOP]] ] +; CHECK-NEXT: [[PHI3_TC]] = phi float [ [[LL_BC]], [[LOOPTHEN]] ], [ [[PHI2_TC]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[END]], label [[LOOP]] ; CHECK: end: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[PHI1]], [[IFEND]] ], [ [[PHI3]], [[LOOPEND]] ] -; CHECK-NEXT: [[B:%.*]] = bitcast i32 [[PHI]] to float -; CHECK-NEXT: ret float [[B]] +; CHECK-NEXT: [[PHI_TC:%.*]] = phi float [ [[PHI1_TC]], [[IFEND]] ], [ [[PHI3_TC]], [[LOOPEND]] ] +; CHECK-NEXT: ret float [[PHI_TC]] ; entry: %cmp = icmp eq i64 %n, 1 Index: llvm/test/CodeGen/Thumb2/mve-float32regloops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1649,156 +1649,135 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: .pad #88 -; CHECK-NEXT: sub sp, #88 -; CHECK-NEXT: ldrd r12, r10, [r0] -; CHECK-NEXT: @ implicit-def: $s2 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: ldrd r6, r9, [r0] ; CHECK-NEXT: and r7, r3, #3 -; CHECK-NEXT: ldr.w r11, [r0, #8] -; CHECK-NEXT: lsrs r0, r3, #2 -; CHECK-NEXT: str r0, [sp, #60] @ 4-byte Spill -; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: str r2, [sp, #56] @ 4-byte Spill +; CHECK-NEXT: ldr r0, [r0, #8] +; CHECK-NEXT: lsrs r3, r3, #2 +; CHECK-NEXT: @ implicit-def: $r12 +; CHECK-NEXT: str r7, [sp] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: b .LBB19_3 ; CHECK-NEXT: .LBB19_1: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: vmov.f32 s14, s7 -; CHECK-NEXT: ldr r2, [sp, #56] @ 4-byte Reload -; CHECK-NEXT: vmov.f32 s0, s10 -; CHECK-NEXT: vmov.f32 s7, s6 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: mov r7, r5 +; CHECK-NEXT: mov r4, r11 +; CHECK-NEXT: mov r8, r10 ; CHECK-NEXT: .LBB19_2: @ %if.end69 ; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: vstr s8, [r10] -; CHECK-NEXT: subs.w r12, r12, #1 -; CHECK-NEXT: vstr s0, [r10, #4] -; CHECK-NEXT: add.w r11, r11, #128 -; CHECK-NEXT: vstr s14, [r10, #8] +; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: strd r7, r4, [r9] +; CHECK-NEXT: subs r6, #1 +; CHECK-NEXT: strd r3, r8, [r9, #8] +; CHECK-NEXT: add.w r9, r9, #16 ; CHECK-NEXT: mov r1, r2 -; CHECK-NEXT: vstr s7, [r10, #12] -; CHECK-NEXT: add.w r10, r10, #16 ; CHECK-NEXT: beq.w .LBB19_13 ; CHECK-NEXT: .LBB19_3: @ %do.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB19_5 Depth 2 -; CHECK-NEXT: vldr s7, [r10, #8] -; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: ldr r0, [sp, #60] @ 4-byte Reload -; CHECK-NEXT: vldr s8, [r10] -; CHECK-NEXT: vldr s10, [r10, #4] -; CHECK-NEXT: vldr s6, [r10, #12] -; CHECK-NEXT: wls lr, r0, .LBB19_6 +; CHECK-NEXT: str r6, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r6, r2 +; CHECK-NEXT: ldrd r5, r11, [r9] +; CHECK-NEXT: ldrd r8, r10, [r9, #8] +; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: wls lr, r2, .LBB19_6 ; CHECK-NEXT: @ %bb.4: @ %while.body.lr.ph ; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: ldrd r5, lr, [sp, #56] @ 8-byte Folded Reload +; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r4, r11 +; CHECK-NEXT: ldr.w lr, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: .LBB19_5: @ %while.body ; CHECK-NEXT: @ Parent Loop BB19_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vmov r4, s8 -; CHECK-NEXT: vldr s8, [r1, #12] -; CHECK-NEXT: vldrw.u32 q0, [r11, #112] -; CHECK-NEXT: vmov r0, s10 -; CHECK-NEXT: vldr s10, [r1, #8] -; CHECK-NEXT: vmov r7, s7 -; CHECK-NEXT: vmov r9, s6 -; CHECK-NEXT: vldrw.u32 q1, [r11] -; CHECK-NEXT: vstrw.32 q0, [sp, #64] @ 16-byte Spill -; CHECK-NEXT: vmov r8, s8 -; CHECK-NEXT: vldrw.u32 q0, [r11, #16] -; CHECK-NEXT: ldr r6, [r1, #4] -; CHECK-NEXT: vldrw.u32 q7, [r11, #32] -; CHECK-NEXT: vmul.f32 q1, q1, r8 -; CHECK-NEXT: vmov r3, s10 -; CHECK-NEXT: vldrw.u32 q3, [r11, #48] -; CHECK-NEXT: vfma.f32 q1, q0, r3 -; CHECK-NEXT: ldr r3, [r1] -; CHECK-NEXT: vfma.f32 q1, q7, r6 -; CHECK-NEXT: vldrw.u32 q6, [r11, #64] -; CHECK-NEXT: vfma.f32 q1, q3, r3 -; CHECK-NEXT: vldrw.u32 q5, [r11, #80] -; CHECK-NEXT: vfma.f32 q1, q6, r4 -; CHECK-NEXT: vldrw.u32 q4, [r11, #96] -; CHECK-NEXT: vfma.f32 q1, q5, r0 -; CHECK-NEXT: vldrw.u32 q0, [sp, #64] @ 16-byte Reload -; CHECK-NEXT: vfma.f32 q1, q4, r7 +; CHECK-NEXT: ldr r5, [r1, #12] +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vldrw.u32 q6, [r0, #16] +; CHECK-NEXT: ldm.w r1, {r2, r7, r11} +; CHECK-NEXT: vmul.f32 q1, q1, r5 +; CHECK-NEXT: vldrw.u32 q7, [r0, #32] +; CHECK-NEXT: vfma.f32 q1, q6, r11 +; CHECK-NEXT: vldrw.u32 q4, [r0, #48] +; CHECK-NEXT: vfma.f32 q1, q7, r7 +; CHECK-NEXT: vldrw.u32 q5, [r0, #64] +; CHECK-NEXT: vfma.f32 q1, q4, r2 +; CHECK-NEXT: vldrw.u32 q3, [r0, #80] +; CHECK-NEXT: vfma.f32 q1, q5, r3 +; CHECK-NEXT: vldrw.u32 q2, [r0, #96] +; CHECK-NEXT: vfma.f32 q1, q3, r4 +; CHECK-NEXT: vldrw.u32 q0, [r0, #112] +; CHECK-NEXT: vfma.f32 q1, q2, r8 ; CHECK-NEXT: adds r1, #16 -; CHECK-NEXT: vfma.f32 q1, q0, r9 -; CHECK-NEXT: vmov.f32 s2, s8 -; CHECK-NEXT: vstrb.8 q1, [r5], #16 +; CHECK-NEXT: vfma.f32 q1, q0, r10 +; CHECK-NEXT: vmov r10, s6 +; CHECK-NEXT: vstrb.8 q1, [r6], #16 +; CHECK-NEXT: vmov r8, s7 +; CHECK-NEXT: mov r4, r11 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: mov r12, r5 ; CHECK-NEXT: le lr, .LBB19_5 ; CHECK-NEXT: .LBB19_6: @ %while.end ; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: ldr r7, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload +; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: beq .LBB19_1 ; CHECK-NEXT: @ %bb.7: @ %if.then ; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: vldr s24, [r1] -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vldr s0, [r1, #4] -; CHECK-NEXT: vldrw.u32 q3, [r11] -; CHECK-NEXT: vldr s3, [r1, #12] -; CHECK-NEXT: vldrw.u32 q4, [r11, #32] -; CHECK-NEXT: vldr s1, [r1, #8] -; CHECK-NEXT: vmov r1, s10 -; CHECK-NEXT: vldrw.u32 q2, [r11, #96] -; CHECK-NEXT: vmov r6, s3 -; CHECK-NEXT: vmul.f32 q3, q3, r6 -; CHECK-NEXT: vmov r6, s1 -; CHECK-NEXT: vstrw.32 q2, [sp, #32] @ 16-byte Spill -; CHECK-NEXT: vldrw.u32 q2, [r11, #112] -; CHECK-NEXT: vldrw.u32 q5, [r11, #48] -; CHECK-NEXT: vmov r4, s0 -; CHECK-NEXT: vstrw.32 q2, [sp, #64] @ 16-byte Spill -; CHECK-NEXT: vldrw.u32 q2, [r11, #80] -; CHECK-NEXT: vldrw.u32 q7, [r11, #64] -; CHECK-NEXT: vmov r3, s24 -; CHECK-NEXT: vstrw.32 q2, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vldrw.u32 q2, [r11, #16] -; CHECK-NEXT: vmov r2, s7 -; CHECK-NEXT: cmp r7, #1 -; CHECK-NEXT: vfma.f32 q3, q2, r6 -; CHECK-NEXT: vldrw.u32 q2, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: vfma.f32 q3, q4, r4 -; CHECK-NEXT: vmov lr, s6 -; CHECK-NEXT: vfma.f32 q3, q5, r3 -; CHECK-NEXT: vfma.f32 q3, q7, r0 -; CHECK-NEXT: vfma.f32 q3, q2, r1 -; CHECK-NEXT: vldrw.u32 q2, [sp, #32] @ 16-byte Reload -; CHECK-NEXT: vfma.f32 q3, q2, r2 -; CHECK-NEXT: vldrw.u32 q2, [sp, #64] @ 16-byte Reload -; CHECK-NEXT: vfma.f32 q3, q2, lr +; CHECK-NEXT: ldrd lr, r4, [r1] +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: ldrd r7, r1, [r1, #8] +; CHECK-NEXT: vldrw.u32 q6, [r0, #16] +; CHECK-NEXT: vldrw.u32 q7, [r0, #32] +; CHECK-NEXT: vldrw.u32 q4, [r0, #48] +; CHECK-NEXT: vmul.f32 q0, q0, r1 +; CHECK-NEXT: vldrw.u32 q5, [r0, #64] +; CHECK-NEXT: vfma.f32 q0, q6, r7 +; CHECK-NEXT: vldrw.u32 q3, [r0, #80] +; CHECK-NEXT: vfma.f32 q0, q7, r4 +; CHECK-NEXT: vldrw.u32 q2, [r0, #96] +; CHECK-NEXT: vfma.f32 q0, q4, lr +; CHECK-NEXT: vldrw.u32 q1, [r0, #112] +; CHECK-NEXT: vfma.f32 q0, q5, r5 +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: vfma.f32 q0, q3, r11 +; CHECK-NEXT: vfma.f32 q0, q2, r8 +; CHECK-NEXT: vfma.f32 q0, q1, r10 +; CHECK-NEXT: vmov r5, s0 ; CHECK-NEXT: bne .LBB19_9 ; CHECK-NEXT: @ %bb.8: @ %if.then58 ; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: vstr s12, [r5] -; CHECK-NEXT: vmov.f32 s8, s24 -; CHECK-NEXT: vmov.f32 s0, s2 -; CHECK-NEXT: vmov.f32 s14, s12 -; CHECK-NEXT: b .LBB19_11 +; CHECK-NEXT: str r5, [r6] +; CHECK-NEXT: mov r7, lr +; CHECK-NEXT: mov r4, r12 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: b .LBB19_12 ; CHECK-NEXT: .LBB19_9: @ %if.else ; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: cmp r7, #2 -; CHECK-NEXT: vstmia r5, {s12, s13} -; CHECK-NEXT: bne .LBB19_12 +; CHECK-NEXT: vmov r8, s1 +; CHECK-NEXT: cmp r2, #2 +; CHECK-NEXT: vstr s1, [r6, #4] +; CHECK-NEXT: str r5, [r6] +; CHECK-NEXT: bne .LBB19_11 ; CHECK-NEXT: @ %bb.10: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: vmov.f32 s8, s0 -; CHECK-NEXT: vmov.f32 s14, s13 -; CHECK-NEXT: vmov.f32 s0, s24 -; CHECK-NEXT: vmov.f32 s7, s12 -; CHECK-NEXT: .LBB19_11: @ %if.end69 +; CHECK-NEXT: mov r7, r4 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: mov r4, lr +; CHECK-NEXT: mov r8, r5 +; CHECK-NEXT: b .LBB19_12 +; CHECK-NEXT: .LBB19_11: @ %if.else64 ; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: vmov.f32 s2, s3 -; CHECK-NEXT: ldr r2, [sp, #56] @ 4-byte Reload -; CHECK-NEXT: b .LBB19_2 -; CHECK-NEXT: .LBB19_12: @ %if.else64 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: vstr s2, [r6, #8] +; CHECK-NEXT: .LBB19_12: @ %if.end69 ; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: vmov.f32 s7, s13 -; CHECK-NEXT: ldr r2, [sp, #56] @ 4-byte Reload -; CHECK-NEXT: vmov.f32 s2, s3 -; CHECK-NEXT: vstr s14, [r5, #8] -; CHECK-NEXT: vmov.f32 s8, s1 +; CHECK-NEXT: mov r12, r1 ; CHECK-NEXT: b .LBB19_2 ; CHECK-NEXT: .LBB19_13: @ %do.end -; CHECK-NEXT: add sp, #88 +; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} Index: llvm/test/CodeGen/X86/atomicf128.ll =================================================================== --- llvm/test/CodeGen/X86/atomicf128.ll +++ llvm/test/CodeGen/X86/atomicf128.ll @@ -11,20 +11,15 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: movq _fsc128@{{.*}}(%rip), %rsi -; CHECK-NEXT: movaps (%rsi), %xmm1 +; CHECK-NEXT: movq (%rsi), %rax +; CHECK-NEXT: movq 8(%rsi), %rdx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_1: ## %atomicrmw.start ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rbx ; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx -; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; CHECK-NEXT: lock cmpxchg16b (%rsi) -; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; CHECK-NEXT: jne LBB0_1 ; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end ; CHECK-NEXT: popq %rbx Index: llvm/test/CodeGen/X86/convertphitype.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/convertphitype.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -codegenprepare %s -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" +target triple = "i386-unknown-linux-gnu" + +define float @convphi1(i32 *%s, i32 *%d, i32 %n) { +; CHECK-LABEL: @convphi1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: else: +; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ] +; CHECK-NEXT: [[B:%.*]] = bitcast i32 [[PHI]] to float +; CHECK-NEXT: ret float [[B]] +; +entry: + %cmp15 = icmp sgt i32 %n, 0 + br i1 %cmp15, label %then, label %else + +then: + %ls = load i32, i32* %s, align 4 + br label %end + +else: + %ld = load i32, i32* %d, align 4 + br label %end + +end: + %phi = phi i32 [ %ls, %then ], [ %ld, %else ] + %b = bitcast i32 %phi to float + ret float %b +}