@@ -127,6 +127,10 @@ class X86FlagsCopyLoweringPass : public MachineFunctionPass {
127
127
MachineInstr &JmpI, CondRegArray &CondRegs);
128
128
void rewriteCopy (MachineInstr &MI, MachineOperand &FlagUse,
129
129
MachineInstr &CopyDefI);
130
+ void rewriteSetCarryExtended (MachineBasicBlock &TestMBB,
131
+ MachineBasicBlock::iterator TestPos,
132
+ DebugLoc TestLoc, MachineInstr &SetBI,
133
+ MachineOperand &FlagUse, CondRegArray &CondRegs);
130
134
void rewriteSetCC (MachineBasicBlock &TestMBB,
131
135
MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
132
136
MachineInstr &SetCCI, MachineOperand &FlagUse,
@@ -512,8 +516,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
512
516
} else if (MI.getOpcode () == TargetOpcode::COPY) {
513
517
rewriteCopy (MI, *FlagUse, CopyDefI);
514
518
} else {
515
- // We assume that arithmetic instructions that use flags also def
516
- // them.
519
+ // We assume all other instructions that use flags also def them.
517
520
assert (MI.findRegisterDefOperand (X86::EFLAGS) &&
518
521
" Expected a def of EFLAGS for this instruction!" );
519
522
@@ -525,7 +528,23 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
525
528
// logic.
526
529
FlagsKilled = true ;
527
530
528
- rewriteArithmetic (TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
531
+ switch (MI.getOpcode ()) {
532
+ case X86::SETB_C8r:
533
+ case X86::SETB_C16r:
534
+ case X86::SETB_C32r:
535
+ case X86::SETB_C64r:
536
+ // Use custom lowering for arithmetic that is merely extending the
537
+ // carry flag. We model this as the SETB_C* pseudo instructions.
538
+ rewriteSetCarryExtended (TestMBB, TestPos, TestLoc, MI, *FlagUse,
539
+ CondRegs);
540
+ break ;
541
+
542
+ default :
543
+ // Generically handle remaining uses as arithmetic instructions.
544
+ rewriteArithmetic (TestMBB, TestPos, TestLoc, MI, *FlagUse,
545
+ CondRegs);
546
+ break ;
547
+ }
529
548
break ;
530
549
}
531
550
@@ -753,6 +772,126 @@ void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
753
772
MI.eraseFromParent ();
754
773
}
755
774
775
+ void X86FlagsCopyLoweringPass::rewriteSetCarryExtended (
776
+ MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
777
+ DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
778
+ CondRegArray &CondRegs) {
779
+ // This routine is only used to handle pseudos for setting a register to zero
780
+ // or all ones based on CF. This is essentially the sign extended from 1-bit
781
+ // form of SETB and modeled with the SETB_C* pseudos. They require special
782
+ // handling as they aren't normal SETcc instructions and are lowered to an
783
+ // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
784
+ // they are only provided in reg-defining forms. A complicating factor is that
785
+ // they can define many different register widths.
786
+ assert (SetBI.getOperand (0 ).isReg () &&
787
+ " Cannot have a non-register defined operand to this variant of SETB!" );
788
+
789
+ // Little helper to do the common final step of replacing the register def'ed
790
+ // by this SETB instruction with a new register and removing the SETB
791
+ // instruction.
792
+ auto RewriteToReg = [&](unsigned Reg) {
793
+ MRI->replaceRegWith (SetBI.getOperand (0 ).getReg (), Reg);
794
+ SetBI.eraseFromParent ();
795
+ };
796
+
797
+ // Grab the register class used for this particular instruction.
798
+ auto &SetBRC = *MRI->getRegClass (SetBI.getOperand (0 ).getReg ());
799
+
800
+ MachineBasicBlock &MBB = *SetBI.getParent ();
801
+ auto SetPos = SetBI.getIterator ();
802
+ auto SetLoc = SetBI.getDebugLoc ();
803
+
804
+ auto AdjustReg = [&](unsigned Reg) {
805
+ auto &OrigRC = *MRI->getRegClass (Reg);
806
+ if (&OrigRC == &SetBRC)
807
+ return Reg;
808
+
809
+ unsigned NewReg;
810
+
811
+ int OrigRegSize = TRI->getRegSizeInBits (OrigRC) / 8 ;
812
+ int TargetRegSize = TRI->getRegSizeInBits (SetBRC) / 8 ;
813
+ assert (OrigRegSize <= 8 && " No GPRs larger than 64-bits!" );
814
+ assert (TargetRegSize <= 8 && " No GPRs larger than 64-bits!" );
815
+ int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
816
+ X86::NoSubRegister, X86::sub_32bit};
817
+
818
+ // If the original size is smaller than the target *and* is smaller than 4
819
+ // bytes, we need to explicitly zero extend it. We always extend to 4-bytes
820
+ // to maximize the chance of being able to CSE that operation and to avoid
821
+ // partial dependency stalls extending to 2-bytes.
822
+ if (OrigRegSize < TargetRegSize && OrigRegSize < 4 ) {
823
+ NewReg = MRI->createVirtualRegister (&X86::GR32RegClass);
824
+ BuildMI (MBB, SetPos, SetLoc, TII->get (X86::MOVZX32rr8), NewReg)
825
+ .addReg (Reg);
826
+ if (&SetBRC == &X86::GR32RegClass)
827
+ return NewReg;
828
+ Reg = NewReg;
829
+ OrigRegSize = 4 ;
830
+ }
831
+
832
+ NewReg = MRI->createVirtualRegister (&SetBRC);
833
+ if (OrigRegSize < TargetRegSize) {
834
+ BuildMI (MBB, SetPos, SetLoc, TII->get (TargetOpcode::SUBREG_TO_REG),
835
+ NewReg)
836
+ .addImm (0 )
837
+ .addReg (Reg)
838
+ .addImm (SubRegIdx[OrigRegSize]);
839
+ } else if (OrigRegSize > TargetRegSize) {
840
+ BuildMI (MBB, SetPos, SetLoc, TII->get (TargetOpcode::EXTRACT_SUBREG),
841
+ NewReg)
842
+ .addReg (Reg)
843
+ .addImm (SubRegIdx[TargetRegSize]);
844
+ } else {
845
+ BuildMI (MBB, SetPos, SetLoc, TII->get (TargetOpcode::COPY), NewReg)
846
+ .addReg (Reg);
847
+ }
848
+ return NewReg;
849
+ };
850
+
851
+ unsigned &CondReg = CondRegs[X86::COND_B];
852
+ if (!CondReg)
853
+ CondReg = promoteCondToReg (TestMBB, TestPos, TestLoc, X86::COND_B);
854
+
855
+ // Adjust the condition to have the desired register width by zero-extending
856
+ // as needed.
857
+ // FIXME: We should use a better API to avoid the local reference and using a
858
+ // different variable here.
859
+ unsigned ExtCondReg = AdjustReg (CondReg);
860
+
861
+ // Now we need to turn this into a bitmask. We do this by subtracting it from
862
+ // zero.
863
+ unsigned ZeroReg = MRI->createVirtualRegister (&X86::GR32RegClass);
864
+ BuildMI (MBB, SetPos, SetLoc, TII->get (X86::MOV32r0), ZeroReg);
865
+ ZeroReg = AdjustReg (ZeroReg);
866
+
867
+ unsigned Sub;
868
+ switch (SetBI.getOpcode ()) {
869
+ case X86::SETB_C8r:
870
+ Sub = X86::SUB8rr;
871
+ break ;
872
+
873
+ case X86::SETB_C16r:
874
+ Sub = X86::SUB16rr;
875
+ break ;
876
+
877
+ case X86::SETB_C32r:
878
+ Sub = X86::SUB32rr;
879
+ break ;
880
+
881
+ case X86::SETB_C64r:
882
+ Sub = X86::SUB64rr;
883
+ break ;
884
+
885
+ default :
886
+ llvm_unreachable (" Invalid SETB_C* opcode!" );
887
+ }
888
+ unsigned ResultReg = MRI->createVirtualRegister (&SetBRC);
889
+ BuildMI (MBB, SetPos, SetLoc, TII->get (Sub), ResultReg)
890
+ .addReg (ZeroReg)
891
+ .addReg (ExtCondReg);
892
+ return RewriteToReg (ResultReg);
893
+ }
894
+
756
895
void X86FlagsCopyLoweringPass::rewriteSetCC (MachineBasicBlock &TestMBB,
757
896
MachineBasicBlock::iterator TestPos,
758
897
DebugLoc TestLoc,
0 commit comments