Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -27362,53 +27362,6 @@ } MachineBasicBlock * -X86TargetLowering::EmitLoweredAtomicFP(MachineInstr &MI, - MachineBasicBlock *BB) const { - // Combine the following atomic floating-point modification pattern: - // a.store(reg OP a.load(acquire), release) - // Transform them into: - // OPss (%gpr), %xmm - // movss %xmm, (%gpr) - // Or sd equivalent for 64-bit operations. - unsigned MOp, FOp; - switch (MI.getOpcode()) { - default: llvm_unreachable("unexpected instr type for EmitLoweredAtomicFP"); - case X86::RELEASE_FADD32mr: - FOp = X86::ADDSSrm; - MOp = X86::MOVSSmr; - break; - case X86::RELEASE_FADD64mr: - FOp = X86::ADDSDrm; - MOp = X86::MOVSDmr; - break; - } - const X86InstrInfo *TII = Subtarget.getInstrInfo(); - DebugLoc DL = MI.getDebugLoc(); - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - unsigned ValOpIdx = X86::AddrNumOperands; - unsigned VSrc = MI.getOperand(ValOpIdx).getReg(); - MachineInstrBuilder MIB = - BuildMI(*BB, MI, DL, TII->get(FOp), - MRI.createVirtualRegister(MRI.getRegClass(VSrc))) - .addReg(VSrc); - for (int i = 0; i < X86::AddrNumOperands; ++i) { - MachineOperand &Operand = MI.getOperand(i); - // Clear any kill flags on register operands as we'll create a second - // instruction using the same address operands. - if (Operand.isReg()) - Operand.setIsKill(false); - MIB.add(Operand); - } - MachineInstr *FOpMI = MIB; - MIB = BuildMI(*BB, MI, DL, TII->get(MOp)); - for (int i = 0; i < X86::AddrNumOperands; ++i) - MIB.add(MI.getOperand(i)); - MIB.addReg(FOpMI->getOperand(0).getReg(), RegState::Kill); - MI.eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - -MachineBasicBlock * X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); @@ -28631,10 +28584,6 @@ return BB; } - case X86::RELEASE_FADD32mr: - case X86::RELEASE_FADD64mr: - return EmitLoweredAtomicFP(MI, BB); - case X86::FP32_TO_INT16_IN_MEM: case X86::FP32_TO_INT32_IN_MEM: case X86::FP32_TO_INT64_IN_MEM: Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -887,154 +887,98 @@ * extremely late to prevent them from being accidentally reordered in the backend * (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions) */ -multiclass RELEASE_BINOP_MI { - def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src), - "#BINOP "#NAME#"8mi PSEUDO!", - [(atomic_store_8 addr:$dst, (op - (atomic_load_8 addr:$dst), (i8 imm:$src)))]>; - def NAME#8mr : I<0, Pseudo, (outs), (ins i8mem:$dst, GR8:$src), - "#BINOP "#NAME#"8mr PSEUDO!", - [(atomic_store_8 addr:$dst, (op - (atomic_load_8 addr:$dst), GR8:$src))]>; - def NAME#16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src), - "#BINOP "#NAME#"16mi PSEUDO!", - [(atomic_store_16 addr:$dst, (op - (atomic_load_16 addr:$dst), (i16 imm:$src)))]>; - def NAME#16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src), - "#BINOP "#NAME#"16mr PSEUDO!", - [(atomic_store_16 addr:$dst, (op - (atomic_load_16 addr:$dst), GR16:$src))]>; - def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src), - "#BINOP "#NAME#"32mi PSEUDO!", - [(atomic_store_32 addr:$dst, (op - (atomic_load_32 addr:$dst), (i32 imm:$src)))]>; - def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src), - "#BINOP "#NAME#"32mr PSEUDO!", - [(atomic_store_32 addr:$dst, (op - (atomic_load_32 addr:$dst), GR32:$src))]>; - def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src), - "#BINOP "#NAME#"64mi32 PSEUDO!", - [(atomic_store_64 addr:$dst, (op - (atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>; - def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src), - "#BINOP "#NAME#"64mr PSEUDO!", - [(atomic_store_64 addr:$dst, (op - (atomic_load_64 addr:$dst), GR64:$src))]>; +multiclass RELEASE_BINOP_MI { + def : Pat<(atomic_store_8 addr:$dst, + (op (atomic_load_8 addr:$dst), (i8 imm:$src))), + (!cast(Name#"8mi") addr:$dst, imm:$src)>; + def : Pat<(atomic_store_16 addr:$dst, + (op (atomic_load_16 addr:$dst), (i16 imm:$src))), + (!cast(Name#"16mi") addr:$dst, imm:$src)>; + def : Pat<(atomic_store_32 addr:$dst, + (op (atomic_load_32 addr:$dst), (i32 imm:$src))), + (!cast(Name#"32mi") addr:$dst, imm:$src)>; + def : Pat<(atomic_store_64 addr:$dst, + (op (atomic_load_64 addr:$dst), (i64immSExt32:$src))), + (!cast(Name#"64mi32") addr:$dst, (i64immSExt32:$src))>; + + def : Pat<(atomic_store_8 addr:$dst, + (op (atomic_load_8 addr:$dst), (i8 GR8:$src))), + (!cast(Name#"8mr") addr:$dst, GR8:$src)>; + def : Pat<(atomic_store_16 addr:$dst, + (op (atomic_load_16 addr:$dst), (i16 GR16:$src))), + (!cast(Name#"16mr") addr:$dst, GR16:$src)>; + def : Pat<(atomic_store_32 addr:$dst, + (op (atomic_load_32 addr:$dst), (i32 GR32:$src))), + (!cast(Name#"32mr") addr:$dst, GR32:$src)>; + def : Pat<(atomic_store_64 addr:$dst, + (op (atomic_load_64 addr:$dst), (i64 GR64:$src))), + (!cast(Name#"64mr") addr:$dst, GR64:$src)>; } -let Defs = [EFLAGS], SchedRW = [WriteMicrocoded] in { - defm RELEASE_ADD : RELEASE_BINOP_MI; - defm RELEASE_AND : RELEASE_BINOP_MI; - defm RELEASE_OR : RELEASE_BINOP_MI; - defm RELEASE_XOR : RELEASE_BINOP_MI; - // Note: we don't deal with sub, because substractions of constants are - // optimized into additions before this code can run. +defm : RELEASE_BINOP_MI<"ADD", add>; +defm : RELEASE_BINOP_MI<"AND", and>; +defm : RELEASE_BINOP_MI<"OR", or>; +defm : RELEASE_BINOP_MI<"XOR", xor>; +// Note: we don't deal with sub, because substractions of constants are +// optimized into additions before this code can run. + +multiclass RELEASE_UNOP { + def : Pat<(atomic_store_8 addr:$dst, dag8), + (!cast(Name#8m) addr:$dst)>; + def : Pat<(atomic_store_16 addr:$dst, dag16), + (!cast(Name#16m) addr:$dst)>; + def : Pat<(atomic_store_32 addr:$dst, dag32), + (!cast(Name#32m) addr:$dst)>; + def : Pat<(atomic_store_64 addr:$dst, dag64), + (!cast(Name#64m) addr:$dst)>; } -// Same as above, but for floating-point. -// FIXME: imm version. -// FIXME: Version that doesn't clobber $src, using AVX's VADDSS. -// FIXME: This could also handle SIMD operations with *ps and *pd instructions. -let usesCustomInserter = 1, SchedRW = [WriteMicrocoded] in { -multiclass RELEASE_FP_BINOP_MI { - def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, FR32:$src), - "#BINOP "#NAME#"32mr PSEUDO!", - [(atomic_store_32 addr:$dst, - (i32 (bitconvert (op - (f32 (bitconvert (i32 (atomic_load_32 addr:$dst)))), - FR32:$src))))]>, Requires<[HasSSE1]>; - def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, FR64:$src), - "#BINOP "#NAME#"64mr PSEUDO!", - [(atomic_store_64 addr:$dst, - (i64 (bitconvert (op - (f64 (bitconvert (i64 (atomic_load_64 addr:$dst)))), - FR64:$src))))]>, Requires<[HasSSE2]>; -} -defm RELEASE_FADD : RELEASE_FP_BINOP_MI; -// FIXME: Add fsub, fmul, fdiv, ... -} - -multiclass RELEASE_UNOP { - def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst), - "#UNOP "#NAME#"8m PSEUDO!", - [(atomic_store_8 addr:$dst, dag8)]>; - def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst), - "#UNOP "#NAME#"16m PSEUDO!", - [(atomic_store_16 addr:$dst, dag16)]>; - def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst), - "#UNOP "#NAME#"32m PSEUDO!", - [(atomic_store_32 addr:$dst, dag32)]>; - def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst), - "#UNOP "#NAME#"64m PSEUDO!", - [(atomic_store_64 addr:$dst, dag64)]>; -} - -let Defs = [EFLAGS], Predicates = [UseIncDec], SchedRW = [WriteMicrocoded] in { - defm RELEASE_INC : RELEASE_UNOP< +let Predicates = [UseIncDec] in { + defm : RELEASE_UNOP<"INC", (add (atomic_load_8 addr:$dst), (i8 1)), (add (atomic_load_16 addr:$dst), (i16 1)), (add (atomic_load_32 addr:$dst), (i32 1)), (add (atomic_load_64 addr:$dst), (i64 1))>; - defm RELEASE_DEC : RELEASE_UNOP< + defm : RELEASE_UNOP<"DEC", (add (atomic_load_8 addr:$dst), (i8 -1)), (add (atomic_load_16 addr:$dst), (i16 -1)), (add (atomic_load_32 addr:$dst), (i32 -1)), (add (atomic_load_64 addr:$dst), (i64 -1))>; } -let Defs = [EFLAGS] in { - defm RELEASE_NEG : RELEASE_UNOP< - (ineg (i8 (atomic_load_8 addr:$dst))), - (ineg (i16 (atomic_load_16 addr:$dst))), - (ineg (i32 (atomic_load_32 addr:$dst))), - (ineg (i64 (atomic_load_64 addr:$dst)))>; -} -// NOT doesn't set flags. -defm RELEASE_NOT : RELEASE_UNOP< +defm : RELEASE_UNOP<"NEG", + (ineg (i8 (atomic_load_8 addr:$dst))), + (ineg (i16 (atomic_load_16 addr:$dst))), + (ineg (i32 (atomic_load_32 addr:$dst))), + (ineg (i64 (atomic_load_64 addr:$dst)))>; +defm : RELEASE_UNOP<"NOT", (not (i8 (atomic_load_8 addr:$dst))), (not (i16 (atomic_load_16 addr:$dst))), (not (i32 (atomic_load_32 addr:$dst))), (not (i64 (atomic_load_64 addr:$dst)))>; -let SchedRW = [WriteMicrocoded] in { -def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src), - "#RELEASE_MOV8mi PSEUDO!", - [(atomic_store_8 addr:$dst, (i8 imm:$src))]>; -def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src), - "#RELEASE_MOV16mi PSEUDO!", - [(atomic_store_16 addr:$dst, (i16 imm:$src))]>; -def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src), - "#RELEASE_MOV32mi PSEUDO!", - [(atomic_store_32 addr:$dst, (i32 imm:$src))]>; -def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src), - "#RELEASE_MOV64mi32 PSEUDO!", - [(atomic_store_64 addr:$dst, i64immSExt32:$src)]>; - -def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src), - "#RELEASE_MOV8mr PSEUDO!", - [(atomic_store_8 addr:$dst, GR8 :$src)]>; -def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src), - "#RELEASE_MOV16mr PSEUDO!", - [(atomic_store_16 addr:$dst, GR16:$src)]>; -def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src), - "#RELEASE_MOV32mr PSEUDO!", - [(atomic_store_32 addr:$dst, GR32:$src)]>; -def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src), - "#RELEASE_MOV64mr PSEUDO!", - [(atomic_store_64 addr:$dst, GR64:$src)]>; - -def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src), - "#ACQUIRE_MOV8rm PSEUDO!", - [(set GR8:$dst, (atomic_load_8 addr:$src))]>; -def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src), - "#ACQUIRE_MOV16rm PSEUDO!", - [(set GR16:$dst, (atomic_load_16 addr:$src))]>; -def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src), - "#ACQUIRE_MOV32rm PSEUDO!", - [(set GR32:$dst, (atomic_load_32 addr:$src))]>; -def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src), - "#ACQUIRE_MOV64rm PSEUDO!", - [(set GR64:$dst, (atomic_load_64 addr:$src))]>; -} // SchedRW +def : Pat<(atomic_store_8 addr:$dst, (i8 imm:$src)), + (MOV8mi addr:$dst, imm:$src)>; +def : Pat<(atomic_store_16 addr:$dst, (i16 imm:$src)), + (MOV16mi addr:$dst, imm:$src)>; +def : Pat<(atomic_store_32 addr:$dst, (i32 imm:$src)), + (MOV32mi addr:$dst, imm:$src)>; +def : Pat<(atomic_store_64 addr:$dst, (i64immSExt32:$src)), + (MOV64mi32 addr:$dst, i64immSExt32:$src)>; + +def : Pat<(atomic_store_8 addr:$dst, GR8:$src), + (MOV8mr addr:$dst, GR8:$src)>; +def : Pat<(atomic_store_16 addr:$dst, GR16:$src), + (MOV16mr addr:$dst, GR16:$src)>; +def : Pat<(atomic_store_32 addr:$dst, GR32:$src), + (MOV32mr addr:$dst, GR32:$src)>; +def : Pat<(atomic_store_64 addr:$dst, GR64:$src), + (MOV64mr addr:$dst, GR64:$src)>; + +def : Pat<(i8 (atomic_load_8 addr:$src)), (MOV8rm addr:$src)>; +def : Pat<(i16 (atomic_load_16 addr:$src)), (MOV16rm addr:$src)>; +def : Pat<(i32 (atomic_load_32 addr:$src)), (MOV32rm addr:$src)>; +def : Pat<(i64 (atomic_load_64 addr:$src)), (MOV64rm addr:$src)>; //===----------------------------------------------------------------------===// // DAG Pattern Matching Rules Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -584,70 +584,6 @@ case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify; case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify; - // Atomic load and store require a separate pseudo-inst because Acquire - // implies mayStore and Release implies mayLoad; fix these to regular MOV - // instructions here - case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify; - case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify; - case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify; - case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify; - case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify; - case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify; - case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify; - case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify; - case X86::RELEASE_MOV8mi: OutMI.setOpcode(X86::MOV8mi); goto ReSimplify; - case X86::RELEASE_MOV16mi: OutMI.setOpcode(X86::MOV16mi); goto ReSimplify; - case X86::RELEASE_MOV32mi: OutMI.setOpcode(X86::MOV32mi); goto ReSimplify; - case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify; - case X86::RELEASE_ADD8mi: OutMI.setOpcode(X86::ADD8mi); goto ReSimplify; - case X86::RELEASE_ADD8mr: OutMI.setOpcode(X86::ADD8mr); goto ReSimplify; - case X86::RELEASE_ADD16mi: OutMI.setOpcode(X86::ADD16mi); goto ReSimplify; - case X86::RELEASE_ADD16mr: OutMI.setOpcode(X86::ADD16mr); goto ReSimplify; - case X86::RELEASE_ADD32mi: OutMI.setOpcode(X86::ADD32mi); goto ReSimplify; - case X86::RELEASE_ADD32mr: OutMI.setOpcode(X86::ADD32mr); goto ReSimplify; - case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify; - case X86::RELEASE_ADD64mr: OutMI.setOpcode(X86::ADD64mr); goto ReSimplify; - case X86::RELEASE_AND8mi: OutMI.setOpcode(X86::AND8mi); goto ReSimplify; - case X86::RELEASE_AND8mr: OutMI.setOpcode(X86::AND8mr); goto ReSimplify; - case X86::RELEASE_AND16mi: OutMI.setOpcode(X86::AND16mi); goto ReSimplify; - case X86::RELEASE_AND16mr: OutMI.setOpcode(X86::AND16mr); goto ReSimplify; - case X86::RELEASE_AND32mi: OutMI.setOpcode(X86::AND32mi); goto ReSimplify; - case X86::RELEASE_AND32mr: OutMI.setOpcode(X86::AND32mr); goto ReSimplify; - case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify; - case X86::RELEASE_AND64mr: OutMI.setOpcode(X86::AND64mr); goto ReSimplify; - case X86::RELEASE_OR8mi: OutMI.setOpcode(X86::OR8mi); goto ReSimplify; - case X86::RELEASE_OR8mr: OutMI.setOpcode(X86::OR8mr); goto ReSimplify; - case X86::RELEASE_OR16mi: OutMI.setOpcode(X86::OR16mi); goto ReSimplify; - case X86::RELEASE_OR16mr: OutMI.setOpcode(X86::OR16mr); goto ReSimplify; - case X86::RELEASE_OR32mi: OutMI.setOpcode(X86::OR32mi); goto ReSimplify; - case X86::RELEASE_OR32mr: OutMI.setOpcode(X86::OR32mr); goto ReSimplify; - case X86::RELEASE_OR64mi32: OutMI.setOpcode(X86::OR64mi32); goto ReSimplify; - case X86::RELEASE_OR64mr: OutMI.setOpcode(X86::OR64mr); goto ReSimplify; - case X86::RELEASE_XOR8mi: OutMI.setOpcode(X86::XOR8mi); goto ReSimplify; - case X86::RELEASE_XOR8mr: OutMI.setOpcode(X86::XOR8mr); goto ReSimplify; - case X86::RELEASE_XOR16mi: OutMI.setOpcode(X86::XOR16mi); goto ReSimplify; - case X86::RELEASE_XOR16mr: OutMI.setOpcode(X86::XOR16mr); goto ReSimplify; - case X86::RELEASE_XOR32mi: OutMI.setOpcode(X86::XOR32mi); goto ReSimplify; - case X86::RELEASE_XOR32mr: OutMI.setOpcode(X86::XOR32mr); goto ReSimplify; - case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify; - case X86::RELEASE_XOR64mr: OutMI.setOpcode(X86::XOR64mr); goto ReSimplify; - case X86::RELEASE_INC8m: OutMI.setOpcode(X86::INC8m); goto ReSimplify; - case X86::RELEASE_INC16m: OutMI.setOpcode(X86::INC16m); goto ReSimplify; - case X86::RELEASE_INC32m: OutMI.setOpcode(X86::INC32m); goto ReSimplify; - case X86::RELEASE_INC64m: OutMI.setOpcode(X86::INC64m); goto ReSimplify; - case X86::RELEASE_DEC8m: OutMI.setOpcode(X86::DEC8m); goto ReSimplify; - case X86::RELEASE_DEC16m: OutMI.setOpcode(X86::DEC16m); goto ReSimplify; - case X86::RELEASE_DEC32m: OutMI.setOpcode(X86::DEC32m); goto ReSimplify; - case X86::RELEASE_DEC64m: OutMI.setOpcode(X86::DEC64m); goto ReSimplify; - case X86::RELEASE_NOT8m: OutMI.setOpcode(X86::NOT8m); goto ReSimplify; - case X86::RELEASE_NOT16m: OutMI.setOpcode(X86::NOT16m); goto ReSimplify; - case X86::RELEASE_NOT32m: OutMI.setOpcode(X86::NOT32m); goto ReSimplify; - case X86::RELEASE_NOT64m: OutMI.setOpcode(X86::NOT64m); goto ReSimplify; - case X86::RELEASE_NEG8m: OutMI.setOpcode(X86::NEG8m); goto ReSimplify; - case X86::RELEASE_NEG16m: OutMI.setOpcode(X86::NEG16m); goto ReSimplify; - case X86::RELEASE_NEG32m: OutMI.setOpcode(X86::NEG32m); goto ReSimplify; - case X86::RELEASE_NEG64m: OutMI.setOpcode(X86::NEG64m); goto ReSimplify; - // We don't currently select the correct instruction form for instructions // which have a short %eax, etc. form. Handle this by custom lowering, for // now. Index: test/CodeGen/X86/atomic-non-integer.ll =================================================================== --- test/CodeGen/X86/atomic-non-integer.ll +++ test/CodeGen/X86/atomic-non-integer.ll @@ -62,7 +62,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: movw (%rdi), %ax +; CHECK-NEXT: movzwl (%rdi), %eax ; CHECK-NEXT: movzwl %ax, %edi ; CHECK-NEXT: callq __gnu_h2f_ieee ; CHECK-NEXT: popq %rax Index: test/CodeGen/X86/atomic_idempotent.ll =================================================================== --- test/CodeGen/X86/atomic_idempotent.ll +++ test/CodeGen/X86/atomic_idempotent.ll @@ -28,14 +28,14 @@ ; X64-LABEL: or16: ; X64: # %bb.0: ; X64-NEXT: mfence -; X64-NEXT: movw (%rdi), %ax +; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: retq ; ; X32-LABEL: or16: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: mfence -; X32-NEXT: movw (%eax), %ax +; X32-NEXT: movzwl (%eax), %eax ; X32-NEXT: retl %1 = atomicrmw or i16* %p, i16 0 acquire ret i16 %1 Index: test/CodeGen/X86/atomic_mi.ll =================================================================== --- test/CodeGen/X86/atomic_mi.ll +++ test/CodeGen/X86/atomic_mi.ll @@ -1607,7 +1607,7 @@ ; treat 16 bit arithmetic as expensive on X86/X86_64. ; X64-LABEL: neg_16: ; X64: # %bb.0: -; X64-NEXT: movw (%rdi), %ax +; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: negl %eax ; X64-NEXT: movw %ax, (%rdi) ; X64-NEXT: retq @@ -1615,7 +1615,7 @@ ; X32-LABEL: neg_16: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movw (%eax), %cx +; X32-NEXT: movzwl (%eax), %ecx ; X32-NEXT: negl %ecx ; X32-NEXT: movw %cx, (%eax) ; X32-NEXT: retl @@ -1718,8 +1718,11 @@ define void @fadd_32r(float* %loc, float %val) { ; X64-LABEL: fadd_32r: ; X64: # %bb.0: -; X64-NEXT: addss (%rdi), %xmm0 -; X64-NEXT: movss %xmm0, (%rdi) +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movd %eax, %xmm1 +; X64-NEXT: addss %xmm0, %xmm1 +; X64-NEXT: movd %xmm1, %eax +; X64-NEXT: movl %eax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: fadd_32r: @@ -1751,8 +1754,11 @@ define void @fadd_64r(double* %loc, double %val) { ; X64-LABEL: fadd_64r: ; X64: # %bb.0: -; X64-NEXT: addsd (%rdi), %xmm0 -; X64-NEXT: movsd %xmm0, (%rdi) +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq %rax, %xmm1 +; X64-NEXT: addsd %xmm0, %xmm1 +; X64-NEXT: movq %xmm1, %rax +; X64-NEXT: movq %rax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: fadd_64r: @@ -1812,9 +1818,11 @@ define void @fadd_32g() { ; X64-LABEL: fadd_32g: ; X64: # %bb.0: -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: movl {{.*}}(%rip), %eax +; X64-NEXT: movd %eax, %xmm0 ; X64-NEXT: addss {{.*}}(%rip), %xmm0 -; X64-NEXT: movss %xmm0, {{.*}}(%rip) +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: movl %eax, {{.*}}(%rip) ; X64-NEXT: retq ; ; X32-LABEL: fadd_32g: @@ -1843,9 +1851,11 @@ define void @fadd_64g() { ; X64-LABEL: fadd_64g: ; X64: # %bb.0: -; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: movq {{.*}}(%rip), %rax +; X64-NEXT: movq %rax, %xmm0 ; X64-NEXT: addsd {{.*}}(%rip), %xmm0 -; X64-NEXT: movsd %xmm0, {{.*}}(%rip) +; X64-NEXT: movq %xmm0, %rax +; X64-NEXT: movq %rax, {{.*}}(%rip) ; X64-NEXT: retq ; ; X32-LABEL: fadd_64g: @@ -1898,9 +1908,11 @@ ; X64-LABEL: fadd_32imm: ; X64: # %bb.0: ; X64-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: addss (%rax), %xmm0 -; X64-NEXT: movss %xmm0, (%rax) +; X64-NEXT: movl (%rax), %ecx +; X64-NEXT: movd %ecx, %xmm0 +; X64-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-NEXT: movd %xmm0, %ecx +; X64-NEXT: movl %ecx, (%rax) ; X64-NEXT: retq ; ; X32-LABEL: fadd_32imm: @@ -1930,9 +1942,11 @@ ; X64-LABEL: fadd_64imm: ; X64: # %bb.0: ; X64-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF -; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: addsd (%rax), %xmm0 -; X64-NEXT: movsd %xmm0, (%rax) +; X64-NEXT: movq (%rax), %rcx +; X64-NEXT: movq %rcx, %xmm0 +; X64-NEXT: addsd {{.*}}(%rip), %xmm0 +; X64-NEXT: movq %xmm0, %rcx +; X64-NEXT: movq %rcx, (%rax) ; X64-NEXT: retq ; ; X32-LABEL: fadd_64imm: @@ -1984,9 +1998,11 @@ define void @fadd_32stack() { ; X64-LABEL: fadd_32stack: ; X64: # %bb.0: -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: addss -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movd %eax, %xmm0 +; X64-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ; X64-NEXT: retq ; ; X32-LABEL: fadd_32stack: @@ -2017,9 +2033,11 @@ define void @fadd_64stack() { ; X64-LABEL: fadd_64stack: ; X64: # %bb.0: -; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: addsd -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, %xmm0 +; X64-NEXT: addsd {{.*}}(%rip), %xmm0 +; X64-NEXT: movq %xmm0, %rax +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; X64-NEXT: retq ; ; X32-LABEL: fadd_64stack: @@ -2072,8 +2090,11 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) { ; X64-LABEL: fadd_array: ; X64: # %bb.0: # %bb -; X64-NEXT: addsd (%rdi,%rsi,8), %xmm0 -; X64-NEXT: movsd %xmm0, (%rdi,%rsi,8) +; X64-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NEXT: movq %rax, %xmm1 +; X64-NEXT: addsd %xmm0, %xmm1 +; X64-NEXT: movq %xmm1, %rax +; X64-NEXT: movq %rax, (%rdi,%rsi,8) ; X64-NEXT: retq ; ; X32-LABEL: fadd_array: Index: test/CodeGen/X86/conditional-tailcall-samedest.mir =================================================================== --- test/CodeGen/X86/conditional-tailcall-samedest.mir +++ test/CodeGen/X86/conditional-tailcall-samedest.mir @@ -118,7 +118,7 @@ bb.2.sw.bb: successors: %bb.3(0x00000800), %bb.6(0x7ffff800) - $al = ACQUIRE_MOV8rm $rip, 1, $noreg, @static_local_guard, $noreg :: (volatile load acquire 1 from `i8* bitcast (i64* @static_local_guard to i8*)`, align 8) + $al = MOV8rm $rip, 1, $noreg, @static_local_guard, $noreg :: (volatile load acquire 1 from `i8* bitcast (i64* @static_local_guard to i8*)`, align 8) TEST8rr killed $al, $al, implicit-def $eflags JNE_1 %bb.6, implicit killed $eflags JMP_1 %bb.3