Index: lib/CodeGen/SelectionDAG/InstrEmitter.cpp =================================================================== --- lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -248,8 +248,11 @@ User->getOperand(2).getResNo() == i) { unsigned Reg = cast(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { - const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); - if (RegRC == RC) { + // Allow constraining the virtual register's class within reason, + // just like what AddRegisterOperand will allow. + const TargetRegisterClass *ConstrainedRC + = MRI->constrainRegClass(Reg, RC, MinRCSize); + if (ConstrainedRC) { VRBase = Reg; MIB.addReg(VRBase, RegState::Define); break; Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1791,7 +1791,8 @@ case VCCZ: { const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(TrueReg); - assert(MRI.getRegClass(FalseReg) == RC); + assert(MRI.getTargetRegisterInfo()->getCommonSubClass(RC, + MRI.getRegClass(FalseReg))); int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32; CondCycles = TrueCycles = FalseCycles = NumInsts; // ??? @@ -1805,7 +1806,8 @@ // with a vector one. const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(TrueReg); - assert(MRI.getRegClass(FalseReg) == RC); + assert(MRI.getTargetRegisterInfo()->getCommonSubClass(RC, + MRI.getRegClass(FalseReg))); int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32; Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -793,10 +793,12 @@ case AMDGPU::VS_32RegClassID: case AMDGPU::SReg_32RegClassID: case AMDGPU::SReg_32_XM0RegClassID: + case AMDGPU::SReg_32_XM0_XEXECRegClassID: return 32; case AMDGPU::SGPR_64RegClassID: case AMDGPU::VS_64RegClassID: case AMDGPU::SReg_64RegClassID: + case AMDGPU::SReg_64_XEXECRegClassID: case AMDGPU::VReg_64RegClassID: return 64; case AMDGPU::VReg_96RegClassID: Index: test/CodeGen/AArch64/arm64-atomic.ll =================================================================== --- test/CodeGen/AArch64/arm64-atomic.ll +++ test/CodeGen/AArch64/arm64-atomic.ll @@ -20,18 +20,17 @@ define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 { ; CHECK-LABEL: val_compare_and_swap_from_load: +; CHECK-NEXT: mov x[[P:[0-9]+]], x0 ; CHECK-NEXT: ldr [[NEW:w[0-9]+]], [x2] ; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]: -; CHECK-NEXT: ldaxr w[[RESULT:[0-9]+]], [x0] -; CHECK-NEXT: cmp w[[RESULT]], w1 +; CHECK-NEXT: ldaxr w0, [x[[P]]] +; CHECK-NEXT: cmp w0, w1 ; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] -; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x0] +; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x[[P]]] ; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] -; CHECK-NEXT: mov x0, x[[RESULT]] ; CHECK-NEXT: ret ; CHECK-NEXT: [[FAILBB]]: ; CHECK-NEXT: clrex -; CHECK-NEXT: mov x0, x[[RESULT]] ; CHECK-NEXT: ret %new = load i32, i32* %pnew %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire @@ -91,13 +90,14 @@ define i64 @fetch_and_nand_64(i64* %p) #0 { ; CHECK-LABEL: fetch_and_nand_64: -; CHECK: mov x[[ADDR:[0-9]+]], x0 ; CHECK: [[TRYBB:.?LBB[0-9_]+]]: -; CHECK: ldaxr x[[DEST_REG:[0-9]+]], [x[[ADDR]]] +; CHECK: ldaxr x[[DEST_REG:[0-9]+]], [x0] ; CHECK: mvn w[[TMP_REG:[0-9]+]], w[[DEST_REG]] ; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], x[[TMP_REG]], #0xfffffffffffffff8 -; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]] +; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] ; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]] +; CHECK: mov x0, x[[DEST_REG]] +; CHECK: ret %val = atomicrmw nand i64* %p, i64 7 acq_rel ret i64 %val @@ -119,12 +119,13 @@ define i64 @fetch_and_or_64(i64* %p) #0 { ; CHECK: fetch_and_or_64: -; CHECK: mov x[[ADDR:[0-9]+]], x0 ; CHECK: [[TRYBB:.?LBB[0-9_]+]]: -; CHECK: ldxr [[DEST_REG:x[0-9]+]], [x[[ADDR]]] +; CHECK: ldxr [[DEST_REG:x[0-9]+]], [x0] ; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0x7 -; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]] +; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] ; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]] +; CHECK: mov x0, x[[ADDR:[0-9]+]] +; CHECK: ret %val = atomicrmw or i64* %p, i64 7 monotonic ret i64 %val } Index: test/CodeGen/AArch64/combine-comparisons-by-cse.ll =================================================================== --- test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -11,9 +11,11 @@ ; CHECK-LABEL: combine_gt_ge_10 ; CHECK: cmp ; CHECK: b.le -; CHECK: ret +; CHECK: b.ne +; CHECK-NEXT: b {{.?LBB[0-9_]+}} ; CHECK-NOT: cmp ; CHECK: b.lt +; CHECK: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp sgt i32 %0, 10 @@ -48,9 +50,10 @@ ; CHECK-LABEL: combine_gt_lt_5 ; CHECK: cmp ; CHECK: b.le -; CHECK: ret +; CHECK: b {{.?LBB[0-9_]+}} ; CHECK-NOT: cmp ; CHECK: b.ge +; CHECK: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp sgt i32 %0, 5 @@ -85,9 +88,11 @@ ; CHECK-LABEL: combine_lt_ge_5 ; CHECK: cmp ; CHECK: b.ge -; CHECK: ret +; CHECK: b.ne +; CHECK-NEXT: b {{.?LBB[0-9_]+}} ; CHECK-NOT: cmp ; CHECK: b.gt +; CHECK: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp slt i32 %0, 5 @@ -122,9 +127,10 @@ ; CHECK-LABEL: combine_lt_gt_5 ; CHECK: cmp ; CHECK: b.ge -; CHECK: ret +; CHECK: b {{.?LBB[0-9_]+}} ; CHECK-NOT: cmp ; CHECK: b.le +; CHECK: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp slt i32 %0, 5 @@ -159,9 +165,10 @@ ; CHECK-LABEL: combine_gt_lt_n5 ; CHECK: cmn ; CHECK: b.le -; CHECK: ret +; CHECK: b {{.?LBB[0-9_]+}} ; CHECK-NOT: cmn ; CHECK: b.ge +; CHECK: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp sgt i32 %0, -5 @@ -196,9 +203,10 @@ ; CHECK-LABEL: combine_lt_gt_n5 ; CHECK: cmn ; CHECK: b.ge -; CHECK: ret +; CHECK: b {{.?LBB[0-9_]+}} ; CHECK-NOT: cmn ; CHECK: b.le +; CHECK: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp slt i32 %0, -5 Index: test/CodeGen/ARM/2011-08-25-ldmia_ret.ll =================================================================== --- test/CodeGen/ARM/2011-08-25-ldmia_ret.ll +++ test/CodeGen/ARM/2011-08-25-ldmia_ret.ll @@ -42,7 +42,7 @@ ; If-convert the return ; CHECK: it ne ; Fold the CSR+return into a pop -; CHECK: pop {r4, r5, r7, pc} +; CHECK: pop {r4, r5, r6, r7, pc} sw.bb18: %call20 = tail call i32 @bar(i32 %in2) nounwind switch i32 %call20, label %sw.default56 [ Index: test/CodeGen/ARM/atomic-64bit.ll =================================================================== --- test/CodeGen/ARM/atomic-64bit.ll +++ test/CodeGen/ARM/atomic-64bit.ll @@ -79,8 +79,8 @@ ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB-LE-DAG: and.w [[REG3:[a-z0-9]+]], [[REG1]] ; CHECK-THUMB-LE-DAG: and.w [[REG4:[a-z0-9]+]], [[REG2]] -; CHECK-THUMB-BE-DAG: and.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB-BE-DAG: and.w [[REG3:[a-z0-9]+]], [[REG1]] +; CHECK-THUMB-BE-DAG: and.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -108,8 +108,8 @@ ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB-LE-DAG: orr.w [[REG3:[a-z0-9]+]], [[REG1]] ; CHECK-THUMB-LE-DAG: orr.w [[REG4:[a-z0-9]+]], [[REG2]] -; CHECK-THUMB-BE-DAG: orr.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB-BE-DAG: orr.w [[REG3:[a-z0-9]+]], [[REG1]] +; CHECK-THUMB-BE-DAG: orr.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -137,8 +137,8 @@ ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB-LE-DAG: eor.w [[REG3:[a-z0-9]+]], [[REG1]] ; CHECK-THUMB-LE-DAG: eor.w [[REG4:[a-z0-9]+]], [[REG2]] -; CHECK-THUMB-BE-DAG: eor.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB-BE-DAG: eor.w [[REG3:[a-z0-9]+]], [[REG1]] +; CHECK-THUMB-BE-DAG: eor.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne Index: test/CodeGen/ARM/atomic-cmp.ll =================================================================== --- test/CodeGen/ARM/atomic-cmp.ll +++ test/CodeGen/ARM/atomic-cmp.ll @@ -1,17 +1,12 @@ -; RUN: llc < %s -mtriple=armv7-apple-darwin -verify-machineinstrs | FileCheck %s -check-prefix=ARM -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -verify-machineinstrs | FileCheck %s -check-prefix=T2 +; RUN: llc < %s -mtriple=armv7-apple-darwin -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -verify-machineinstrs | FileCheck %s ; rdar://8964854 define i8 @t(i8* %a, i8 %b, i8 %c) nounwind { -; ARM-LABEL: t: -; ARM: ldrexb -; ARM: strexb -; ARM: clrex - -; T2-LABEL: t: -; T2: strexb -; T2: ldrexb -; T2: clrex +; CHECK-LABEL: t: +; CHECK: ldrexb +; CHECK: strexb +; CHECK: clrex %tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic monotonic %tmp1 = extractvalue { i8, i1 } %tmp0, 0 ret i8 %tmp1 Index: test/CodeGen/ARM/atomic-ops-v8.ll =================================================================== --- test/CodeGen/ARM/atomic-ops-v8.ll +++ test/CodeGen/ARM/atomic-ops-v8.ll @@ -1037,29 +1037,27 @@ ; CHECK-NOT: mcr ; CHECK-DAG: movw r[[ADDR:[0-9]+]], :lower16:var8 ; CHECK-DAG: movt r[[ADDR]], :upper16:var8 -; CHECK-THUMB-DAG: mov r[[WANTED:[0-9]+]], r0 ; CHECK: .LBB{{[0-9]+}}_1: ; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM-NEXT: cmp r[[OLD]], r0 -; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]] +; CHECK-NEXT: cmp r[[OLD]], r0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 ; CHECK-NEXT: %bb.2: ; As above, r1 is a reasonable guess. ; CHECK: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-ARM: mov r0, r[[OLD]] +; CHECK: mov r0, r[[OLD]] ; CHECK: bx lr ; CHECK-NEXT: .LBB{{[0-9]+}}_4: ; CHECK-NEXT: clrex ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK-ARM: mov r0, r[[OLD]] -; CHECK-ARM-NEXT: bx lr +; CHECK: mov r0, r[[OLD]] +; CHECK-NEXT: bx lr ret i8 %old } @@ -1071,21 +1069,19 @@ ; CHECK-NOT: mcr ; CHECK-DAG: movw r[[ADDR:[0-9]+]], :lower16:var16 ; CHECK-DAG: movt r[[ADDR]], :upper16:var16 -; CHECK-THUMB-DAG: mov r[[WANTED:[0-9]+]], r0 ; CHECK: .LBB{{[0-9]+}}_1: ; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM-NEXT: cmp r[[OLD]], r0 -; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]] +; CHECK-NEXT: cmp r[[OLD]], r0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 ; CHECK-NEXT: %bb.2: ; As above, r1 is a reasonable guess. ; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-ARM: mov r0, r[[OLD]] +; CHECK: mov r0, r[[OLD]] ; CHECK: bx lr ; CHECK-NEXT: .LBB{{[0-9]+}}_4: ; CHECK-NEXT: clrex Index: test/CodeGen/PowerPC/vsx.ll =================================================================== --- test/CodeGen/PowerPC/vsx.ll +++ test/CodeGen/PowerPC/vsx.ll @@ -259,10 +259,8 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test15 -; CHECK-FISL: xxlor 0, 34, 35 -; CHECK-FISL: xxlor 36, 0, 0 -; CHECK-FISL: xxlnor 0, 34, 35 -; CHECK-FISL: xxlor 34, 0, 0 +; CHECK-FISL: xxlor 36, 34, 35 +; CHECK-FISL: xxlnor 34, 34, 35 ; CHECK-FISL-NOT: lis ; CHECK-FISL-NOT: ori ; CHECK-FISL: li 3, -16 @@ -287,10 +285,8 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test16 -; CHECK-FISL: xxlor 0, 34, 35 -; CHECK-FISL: xxlor 36, 0, 0 -; CHECK-FISL: xxlnor 0, 34, 35 -; CHECK-FISL: xxlor 34, 0, 0 +; CHECK-FISL: xxlor 36, 34, 35 +; CHECK-FISL: xxlnor 34, 34, 35 ; CHECK-FISL-NOT: lis ; CHECK-FISL-NOT: ori ; CHECK-FISL: li 3, -16 @@ -335,10 +331,8 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test18 -; CHECK-FISL: xxlnor 0, 35, 35 -; CHECK-FISL: xxlor 36, 0, 0 -; CHECK-FISL: xxlandc 0, 34, 35 -; CHECK-FISL: xxlor 34, 0, 0 +; CHECK-FISL: xxlnor 36, 35, 35 +; CHECK-FISL: xxlandc 34, 34, 35 ; CHECK-FISL-NOT: lis ; CHECK-FISL-NOT: ori ; CHECK-FISL: li 3, -16 @@ -363,10 +357,8 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test19 -; CHECK-FISL: xxlnor 0, 35, 35 -; CHECK-FISL: xxlor 36, 0, 0 -; CHECK-FISL: xxlandc 0, 34, 35 -; CHECK-FISL: xxlor 34, 0, 0 +; CHECK-FISL: xxlnor 36, 35, 35 +; CHECK-FISL: xxlandc 34, 34, 35 ; CHECK-FISL-NOT: lis ; CHECK-FISL-NOT: ori ; CHECK-FISL: li 3, -16 Index: test/CodeGen/SystemZ/cond-move-03.ll =================================================================== --- test/CodeGen/SystemZ/cond-move-03.ll +++ test/CodeGen/SystemZ/cond-move-03.ll @@ -20,15 +20,14 @@ ret void } -; FIXME: We should commute the LOCRMux to save one move. define void @f2(i32 %limit) { ; CHECK-LABEL: f2: ; CHECK-DAG: stepa [[REG1:%r[0-5]]] ; CHECK-DAG: stepb [[REG2:%r[0-5]]] -; CHECK-DAG: clijhe %r2, 42, -; CHECK: risblg [[REG2]], [[REG1]], 0, 159, 32 -; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32 -; CHECK: stepc [[REG1]] +; CHECK-DAG: clfi %r2, 42 +; CHECK: risbhg %r2, [[REG2]], 0, 159, 32 +; CHECK: locfhrl %r2, [[REG1]] +; CHECK: stepc %r2 ; CHECK: br %r14 %dummy = call i32 asm sideeffect "dummy $0", "=h"() %a = call i32 asm sideeffect "stepa $0", "=h"() @@ -58,15 +57,14 @@ ret void } -; FIXME: We should commute the LOCRMux to save one move. define void @f4(i32 %limit) { ; CHECK-LABEL: f4: ; CHECK-DAG: stepa [[REG1:%r[0-5]]] ; CHECK-DAG: stepb [[REG2:%r[0-5]]] -; CHECK-DAG: clijhe %r2, 42, -; CHECK: risbhg [[REG2]], [[REG1]], 0, 159, 32 -; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32 -; CHECK: stepc [[REG1]] +; CHECK-DAG: clfi %r2, 42 +; CHECK: risblg [[REG2]], [[REG2]], 0, 159, 32 +; CHECK: locrl [[REG2]], [[REG1]] +; CHECK: stepc [[REG2]] ; CHECK: br %r14 %dummy = call i32 asm sideeffect "dummy $0", "=h"() %a = call i32 asm sideeffect "stepa $0", "=r"()