Index: llvm/trunk/lib/CodeGen/MachineLICM.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/MachineLICM.cpp
+++ llvm/trunk/lib/CodeGen/MachineLICM.cpp
@@ -463,8 +463,12 @@
     for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
       if (PhysRegDefs.test(*AS))
         PhysRegClobbers.set(*AS);
-      PhysRegDefs.set(*AS);
     }
+    // Need a second loop because MCRegAliasIterator can visit the same
+    // register twice.
+    for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS)
+      PhysRegDefs.set(*AS);
+
     if (PhysRegClobbers.test(Reg))
       // MI defined register is seen defined by another instruction in
       // the loop, it cannot be a LICM candidate.
Index: llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -444,7 +444,7 @@
 ; GCN-NEXT: s_xor_b64 exec, exec, [[TEMP_MASK1]]
 ; GCN-NEXT: ; mask branch [[RET:BB[0-9]+_[0-9]+]]
 
-; GCN: [[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop
+; GCN: [[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop{{$}}
 ; GCN: ;;#ASMSTART
 ; GCN: v_nop_e64
 ; GCN: v_nop_e64
@@ -453,7 +453,7 @@
 ; GCN: v_nop_e64
 ; GCN: v_nop_e64
 ; GCN: ;;#ASMEND
-; GCN: s_cbranch_execz [[RET]]
+; GCN: s_cbranch_vccz [[RET]]
 
 ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop
 ; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1
Index: llvm/trunk/test/CodeGen/AMDGPU/infinite-loop.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/infinite-loop.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/infinite-loop.ll
@@ -32,11 +32,11 @@
 ; SI: s_cbranch_execz [[RET:BB[0-9]+_[0-9]+]]
 
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
-; SI: [[LOOP:BB[0-9]+_[0-9]+]]:  ; %loop
 ; SI: s_and_b64 vcc, exec, -1
+; SI: [[LOOP:BB[0-9]+_[0-9]+]]:  ; %loop
 ; SI: s_waitcnt lgkmcnt(0)
 ; SI: buffer_store_dword [[REG]]
-; SI: s_cbranch_execnz [[LOOP]]
+; SI: s_cbranch_vccnz [[LOOP]]
 
 ; SI: [[RET]]:  ; %UnifiedReturnBlock
 ; SI: s_endpgm
Index: llvm/trunk/test/CodeGen/X86/atomic_mi.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/atomic_mi.ll
+++ llvm/trunk/test/CodeGen/X86/atomic_mi.ll
@@ -93,11 +93,11 @@
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
+; X32-NEXT:    xorl %ecx, %ecx
+; X32-NEXT:    movl $42, %ebx
 ; X32-NEXT:    .p2align 4, 0x90
 ; X32-NEXT:  .LBB3_1: # %atomicrmw.start
 ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    movl $42, %ebx
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB3_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
@@ -132,11 +132,11 @@
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
+; X32-NEXT:    movl $23, %ecx
+; X32-NEXT:    movl $1215752192, %ebx # imm = 0x4876E800
 ; X32-NEXT:    .p2align 4, 0x90
 ; X32-NEXT:  .LBB4_1: # %atomicrmw.start
 ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
-; X32-NEXT:    movl $23, %ecx
-; X32-NEXT:    movl $1215752192, %ebx # imm = 0x4876E800
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB4_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
@@ -753,10 +753,10 @@
 ; X32-NEXT:    andl $2, %ebx
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
+; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    .p2align 4, 0x90
 ; X32-NEXT:  .LBB31_1: # %atomicrmw.start
 ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
-; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB31_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
Index: llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -126,7 +126,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
   %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
   %add = add nsw i32 %call, %sum.04
   %inc = add nuw nsw i32 %i.05, 1
   %exitcond = icmp eq i32 %inc, 10
@@ -178,7 +178,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
   %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
   %add = add nsw i32 %call, %sum.03
   %inc = add nuw nsw i32 %i.04, 1
   %exitcond = icmp eq i32 %inc, 10
@@ -248,7 +248,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
   %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
   %add = add nsw i32 %call, %sum.04
   %inc = add nuw nsw i32 %i.05, 1
   %exitcond = icmp eq i32 %inc, 10
@@ -324,7 +324,7 @@
 for.body:                                         ; preds = %for.body, %if.then
   %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
   %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
   %add = add nsw i32 %call, %sum.04
   %inc = add nuw nsw i32 %i.05, 1
   %exitcond = icmp eq i32 %inc, 10
Index: llvm/trunk/test/CodeGen/X86/x86-win64-shrink-wrapping.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/x86-win64-shrink-wrapping.ll
+++ llvm/trunk/test/CodeGen/X86/x86-win64-shrink-wrapping.ll
@@ -100,7 +100,7 @@
 for.body:                                         ; preds = %for.body, %for.preheader
   %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
   %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
   %add = add nsw i32 %call, %sum.04
   %inc = add nuw nsw i32 %i.05, 1
   %exitcond = icmp eq i32 %inc, 10