Index: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -160,11 +160,6 @@ // Thumb Addressing Modes: bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); - bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset, - unsigned Scale); - bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset); - bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset); - bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset); bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, SDValue &OffImm); bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, @@ -1086,77 +1081,13 @@ } bool -ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base, - SDValue &Offset, unsigned Scale) { - if (Scale == 4) { - SDValue TmpBase, TmpOffImm; - if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm)) - return false; // We want to select tLDRspi / tSTRspi instead. - - if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() == ISD::TargetConstantPool) - return false; // We want to select tLDRpci instead. - } - - if (!CurDAG->isBaseWithConstantOffset(N)) - return false; - - // Thumb does not have [sp, r] address mode. - RegisterSDNode *LHSR = dyn_cast(N.getOperand(0)); - RegisterSDNode *RHSR = dyn_cast(N.getOperand(1)); - if ((LHSR && LHSR->getReg() == ARM::SP) || - (RHSR && RHSR->getReg() == ARM::SP)) - return false; - - // FIXME: Why do we explicitly check for a match here and then return false? - // Presumably to allow something else to match, but shouldn't this be - // documented? - int RHSC; - if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) - return false; - - Base = N.getOperand(0); - Offset = N.getOperand(1); - return true; -} - -bool -ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N, - SDValue &Base, - SDValue &Offset) { - return SelectThumbAddrModeRI(N, Base, Offset, 1); -} - -bool -ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N, - SDValue &Base, - SDValue &Offset) { - return SelectThumbAddrModeRI(N, Base, Offset, 2); -} - -bool -ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N, - SDValue &Base, - SDValue &Offset) { - return SelectThumbAddrModeRI(N, Base, Offset, 4); -} - -bool ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, SDValue &OffImm) { - if (Scale == 4) { - SDValue TmpBase, TmpOffImm; - if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm)) - return false; // We want to select tLDRspi / tSTRspi instead. - - if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() == ISD::TargetConstantPool) - return false; // We want to select tLDRpci instead. - } - if (!CurDAG->isBaseWithConstantOffset(N)) { - if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { + if (N.getOpcode() == ISD::ADD) { + return false; // We want to select register offset instead + } else if (N.getOpcode() == ARMISD::Wrapper && + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); } else { Base = N; @@ -1166,23 +1097,6 @@ return true; } - RegisterSDNode *LHSR = dyn_cast(N.getOperand(0)); - RegisterSDNode *RHSR = dyn_cast(N.getOperand(1)); - if ((LHSR && LHSR->getReg() == ARM::SP) || - (RHSR && RHSR->getReg() == ARM::SP)) { - ConstantSDNode *LHS = dyn_cast(N.getOperand(0)); - ConstantSDNode *RHS = dyn_cast(N.getOperand(1)); - unsigned LHSC = LHS ? LHS->getZExtValue() : 0; - unsigned RHSC = RHS ? RHS->getZExtValue() : 0; - - // Thumb does not have [sp, #imm5] address mode for non-zero imm5. - if (LHSC != 0 || RHSC != 0) return false; - - Base = N; - OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); - return true; - } - // If the RHS is + imm5 * scale, fold into addr mode. int RHSC; if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { @@ -1191,9 +1105,8 @@ return true; } - Base = N.getOperand(0); - OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); - return true; + // Offset is too large, so use register offset instead. + return false; } bool Index: llvm/trunk/lib/Target/ARM/ARMInstrThumb.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb.td +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb.td @@ -591,6 +591,34 @@ // Load Store Instructions. // +// PC-relative loads need to be matched first as constant pool accesses need to +// always be PC-relative. We do this using AddedComplexity, as the pattern is +// simpler than the patterns of the other load instructions. +let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 10 in +def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, + "ldr", "\t$Rt, $addr", + [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>, + T1Encoding<{0,1,0,0,1,?}> { + // A6.2 & A8.6.59 + bits<3> Rt; + bits<8> addr; + let Inst{10-8} = Rt; + let Inst{7-0} = addr; +} + +// SP-relative loads should be matched before standard immediate-offset loads as +// it means we avoid having to move SP to another register. +let canFoldAsLoad = 1 in +def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, + "ldr", "\t$Rt, $addr", + [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>, + T1LdStSP<{1,?,?}> { + bits<3> Rt; + bits<8> addr; + let Inst{10-8} = Rt; + let Inst{7-0} = addr; +} + // Loads: reg/reg and reg/imm5 let canFoldAsLoad = 1, isReMaterializable = 1 in multiclass thumb_ld_rr_ri_enc reg_opc, bits<4> imm_opc, @@ -598,16 +626,20 @@ AddrMode am, InstrItinClass itin_r, InstrItinClass itin_i, string asm, PatFrag opnode> { - def r : // reg/reg - T1pILdStEncode; + // Immediate-offset loads should be matched before register-offset loads as + // when the offset is a constant it's simpler to first check if it fits in the + // immediate offset field then fall back to register-offset if it doesn't. def i : // reg/imm5 T1pILdStEncodeImm; + // Register-offset loads are matched last. + def r : // reg/reg + T1pILdStEncode; } // Stores: reg/reg and reg/imm5 multiclass thumb_st_rr_ri_enc reg_opc, bits<4> imm_opc, @@ -615,32 +647,32 @@ AddrMode am, InstrItinClass itin_r, InstrItinClass itin_i, string asm, PatFrag opnode> { - def r : // reg/reg - T1pILdStEncode; def i : // reg/imm5 T1pILdStEncodeImm; + def r : // reg/reg + T1pILdStEncode; } // A8.6.57 & A8.6.60 -defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rrs4, +defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr, t_addrmode_is4, AddrModeT1_4, IIC_iLoad_r, IIC_iLoad_i, "ldr", UnOpFrag<(load node:$Src)>>; // A8.6.64 & A8.6.61 -defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rrs1, +defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr, t_addrmode_is1, AddrModeT1_1, IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb", UnOpFrag<(zextloadi8 node:$Src)>>; // A8.6.76 & A8.6.73 -defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2, +defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr, t_addrmode_is2, AddrModeT1_2, IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh", UnOpFrag<(zextloadi16 node:$Src)>>; @@ -659,58 +691,36 @@ "ldrsh", "\t$Rt, $addr", [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>; -let canFoldAsLoad = 1 in -def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, - "ldr", "\t$Rt, $addr", - [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>, - T1LdStSP<{1,?,?}> { - bits<3> Rt; - bits<8> addr; - let Inst{10-8} = Rt; - let Inst{7-0} = addr; -} -let canFoldAsLoad = 1, isReMaterializable = 1 in -def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, - "ldr", "\t$Rt, $addr", - [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>, - T1Encoding<{0,1,0,0,1,?}> { - // A6.2 & A8.6.59 +def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, + "str", "\t$Rt, $addr", + [(store tGPR:$Rt, t_addrmode_sp:$addr)]>, + T1LdStSP<{0,?,?}> { bits<3> Rt; bits<8> addr; let Inst{10-8} = Rt; - let Inst{7-0} = addr; + let Inst{7-0} = addr; } // A8.6.194 & A8.6.192 -defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4, +defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr, t_addrmode_is4, AddrModeT1_4, IIC_iStore_r, IIC_iStore_i, "str", BinOpFrag<(store node:$LHS, node:$RHS)>>; // A8.6.197 & A8.6.195 -defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1, +defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr, t_addrmode_is1, AddrModeT1_1, IIC_iStore_bh_r, IIC_iStore_bh_i, "strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; // A8.6.207 & A8.6.205 -defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2, +defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr, t_addrmode_is2, AddrModeT1_2, IIC_iStore_bh_r, IIC_iStore_bh_i, "strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; -def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, - "str", "\t$Rt, $addr", - [(store tGPR:$Rt, t_addrmode_sp:$addr)]>, - T1LdStSP<{0,?,?}> { - bits<3> Rt; - bits<8> addr; - let Inst{10-8} = Rt; - let Inst{7-0} = addr; -} - //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -1328,16 +1338,16 @@ (tSUBrr tGPR:$lhs, tGPR:$rhs)>; // Bswap 16 with load/store -def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rrs2:$addr)), (i32 16)), - (tREV16 (tLDRHr t_addrmode_rrs2:$addr))>; def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)), (tREV16 (tLDRHi t_addrmode_is2:$addr))>; -def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)), - t_addrmode_rrs2:$addr), - (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rrs2:$addr)>; +def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rr:$addr)), (i32 16)), + (tREV16 (tLDRHr t_addrmode_rr:$addr))>; def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)), t_addrmode_is2:$addr), (tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>; +def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)), + t_addrmode_rr:$addr), + (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rr:$addr)>; // ConstantPool def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>; @@ -1372,10 +1382,10 @@ Requires<[IsThumb, HasV5T]>; // zextload i1 -> zextload i8 -def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr), - (tLDRBr t_addrmode_rrs1:$addr)>; def : T1Pat<(zextloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; +def : T1Pat<(zextloadi1 t_addrmode_rr:$addr), + (tLDRBr t_addrmode_rr:$addr)>; // extload from the stack -> word load from the stack, as it avoids having to // materialize the base in a separate register. This only works when a word @@ -1389,61 +1399,61 @@ Requires<[IsThumb, IsThumb1Only, IsLE]>; // extload -> zextload -def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>; -def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; -def : T1Pat<(extloadi8 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>; -def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; -def : T1Pat<(extloadi16 t_addrmode_rrs2:$addr), (tLDRHr t_addrmode_rrs2:$addr)>; -def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>; +def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; +def : T1Pat<(extloadi1 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>; +def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; +def : T1Pat<(extloadi8 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>; +def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>; +def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>; // If it's impossible to use [r,r] address mode for sextload, select to // ldr{b|h} + sxt{b|h} instead. def : T1Pat<(sextloadi8 t_addrmode_is1:$addr), (tSXTB (tLDRBi t_addrmode_is1:$addr))>, Requires<[IsThumb, IsThumb1Only, HasV6]>; -def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr), - (tSXTB (tLDRBr t_addrmode_rrs1:$addr))>, +def : T1Pat<(sextloadi8 t_addrmode_rr:$addr), + (tSXTB (tLDRBr t_addrmode_rr:$addr))>, Requires<[IsThumb, IsThumb1Only, HasV6]>; def : T1Pat<(sextloadi16 t_addrmode_is2:$addr), (tSXTH (tLDRHi t_addrmode_is2:$addr))>, Requires<[IsThumb, IsThumb1Only, HasV6]>; -def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr), - (tSXTH (tLDRHr t_addrmode_rrs2:$addr))>, +def : T1Pat<(sextloadi16 t_addrmode_rr:$addr), + (tSXTH (tLDRHr t_addrmode_rr:$addr))>, Requires<[IsThumb, IsThumb1Only, HasV6]>; -def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr), - (tASRri (tLSLri (tLDRBr t_addrmode_rrs1:$addr), 24), 24)>; def : T1Pat<(sextloadi8 t_addrmode_is1:$addr), (tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>; -def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr), - (tASRri (tLSLri (tLDRHr t_addrmode_rrs2:$addr), 16), 16)>; +def : T1Pat<(sextloadi8 t_addrmode_rr:$addr), + (tASRri (tLSLri (tLDRBr t_addrmode_rr:$addr), 24), 24)>; def : T1Pat<(sextloadi16 t_addrmode_is2:$addr), (tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>; +def : T1Pat<(sextloadi16 t_addrmode_rr:$addr), + (tASRri (tLSLri (tLDRHr t_addrmode_rr:$addr), 16), 16)>; def : T1Pat<(atomic_load_8 t_addrmode_is1:$src), (tLDRBi t_addrmode_is1:$src)>; -def : T1Pat<(atomic_load_8 t_addrmode_rrs1:$src), - (tLDRBr t_addrmode_rrs1:$src)>; +def : T1Pat<(atomic_load_8 t_addrmode_rr:$src), + (tLDRBr t_addrmode_rr:$src)>; def : T1Pat<(atomic_load_16 t_addrmode_is2:$src), (tLDRHi t_addrmode_is2:$src)>; -def : T1Pat<(atomic_load_16 t_addrmode_rrs2:$src), - (tLDRHr t_addrmode_rrs2:$src)>; +def : T1Pat<(atomic_load_16 t_addrmode_rr:$src), + (tLDRHr t_addrmode_rr:$src)>; def : T1Pat<(atomic_load_32 t_addrmode_is4:$src), (tLDRi t_addrmode_is4:$src)>; -def : T1Pat<(atomic_load_32 t_addrmode_rrs4:$src), - (tLDRr t_addrmode_rrs4:$src)>; +def : T1Pat<(atomic_load_32 t_addrmode_rr:$src), + (tLDRr t_addrmode_rr:$src)>; def : T1Pat<(atomic_store_8 t_addrmode_is1:$ptr, tGPR:$val), (tSTRBi tGPR:$val, t_addrmode_is1:$ptr)>; -def : T1Pat<(atomic_store_8 t_addrmode_rrs1:$ptr, tGPR:$val), - (tSTRBr tGPR:$val, t_addrmode_rrs1:$ptr)>; +def : T1Pat<(atomic_store_8 t_addrmode_rr:$ptr, tGPR:$val), + (tSTRBr tGPR:$val, t_addrmode_rr:$ptr)>; def : T1Pat<(atomic_store_16 t_addrmode_is2:$ptr, tGPR:$val), (tSTRHi tGPR:$val, t_addrmode_is2:$ptr)>; -def : T1Pat<(atomic_store_16 t_addrmode_rrs2:$ptr, tGPR:$val), - (tSTRHr tGPR:$val, t_addrmode_rrs2:$ptr)>; +def : T1Pat<(atomic_store_16 t_addrmode_rr:$ptr, tGPR:$val), + (tSTRHr tGPR:$val, t_addrmode_rr:$ptr)>; def : T1Pat<(atomic_store_32 t_addrmode_is4:$ptr, tGPR:$val), (tSTRi tGPR:$val, t_addrmode_is4:$ptr)>; -def : T1Pat<(atomic_store_32 t_addrmode_rrs4:$ptr, tGPR:$val), - (tSTRr tGPR:$val, t_addrmode_rrs4:$ptr)>; +def : T1Pat<(atomic_store_32 t_addrmode_rr:$ptr, tGPR:$val), + (tSTRr tGPR:$val, t_addrmode_rr:$ptr)>; // Large immediate handling. Index: llvm/trunk/test/CodeGen/ARM/load.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/load.ll +++ llvm/trunk/test/CodeGen/ARM/load.ll @@ -1,35 +1,564 @@ -; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-T1 +; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-T2 -define i32 @f1(i8* %p) { + +; Register offset + +; CHECK-LABEL: ldrsb_rr +; CHECK: ldrsb r0, [r0, r1] +define i32 @ldrsb_rr(i8* %p, i32 %n) { +entry: + %arrayidx = getelementptr inbounds i8, i8* %p, i32 %n + %0 = load i8, i8* %arrayidx, align 1 + %conv = sext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrsh_rr +; CHECK-T1: lsls r1, r1, #1 +; CHECK-T1: ldrsh r0, [r0, r1] +; CHECK-T2: ldrsh.w r0, [r0, r1, lsl #1] +define i32 @ldrsh_rr(i16* %p, i32 %n) { +entry: + %arrayidx = getelementptr inbounds i16, i16* %p, i32 %n + %0 = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrb_rr +; CHECK: ldrb r0, [r0, r1] +define i32 @ldrb_rr(i8* %p, i32 %n) { +entry: + %arrayidx = getelementptr inbounds i8, i8* %p, i32 %n + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrh_rr +; CHECK-T1: lsls r1, r1, #1 +; CHECK-T1: ldrh r0, [r0, r1] +; CHECK-T2: ldrh.w r0, [r0, r1, lsl #1] +define i32 @ldrh_rr(i16* %p, i32 %n) { +entry: + %arrayidx = getelementptr inbounds i16, i16* %p, i32 %n + %0 = load i16, i16* %arrayidx, align 2 + %conv = zext i16 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldr_rr +; CHECK-T1: lsls r1, r1, #2 +; CHECK-T1: ldr r0, [r0, r1] +; CHECK-T2: ldr.w r0, [r0, r1, lsl #2] +define i32 @ldr_rr(i32* %p, i32 %n) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %p, i32 %n + %0 = load i32, i32* %arrayidx, align 4 + ret i32 %0 +} + +; CHECK-LABEL: strb_rr +; CHECK: strb r2, [r0, r1] +define void @strb_rr(i8* %p, i32 %n, i32 %x) { +entry: + %conv = trunc i32 %x to i8 + %arrayidx = getelementptr inbounds i8, i8* %p, i32 %n + store i8 %conv, i8* %arrayidx, align 1 + ret void +} + +; CHECK-LABEL: strh_rr +; CHECK-T1: lsls r1, r1, #1 +; CHECK-T1: strh r2, [r0, r1] +; CHECK-T2: strh.w r2, [r0, r1, lsl #1] +define void @strh_rr(i16* %p, i32 %n, i32 %x) { +entry: + %conv = trunc i32 %x to i16 + %arrayidx = getelementptr inbounds i16, i16* %p, i32 %n + store i16 %conv, i16* %arrayidx, align 2 + ret void +} + +; CHECK-LABEL: str_rr +; CHECK-T1: lsls r1, r1, #2 +; CHECK-T1: str r2, [r0, r1] +; CHECK-T2: str.w r2, [r0, r1, lsl #2] +define void @str_rr(i32* %p, i32 %n, i32 %x) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %p, i32 %n + store i32 %x, i32* %arrayidx, align 4 + ret void +} + + +; Immediate offset of zero + +; CHECK-LABEL: ldrsb_ri_zero +; CHECK-T1: ldrb r0, [r0] +; CHECK-T1: sxtb r0, r0 +; CHECK-T2: ldrsb.w r0, [r0] +define i32 @ldrsb_ri_zero(i8* %p) { +entry: + %0 = load i8, i8* %p, align 1 + %conv = sext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrsh_ri_zero +; CHECK-T1: ldrh r0, [r0] +; CHECK-T1: sxth r0, r0 +; CHECK-T2: ldrsh.w r0, [r0] +define i32 @ldrsh_ri_zero(i16* %p) { +entry: + %0 = load i16, i16* %p, align 2 + %conv = sext i16 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrb_ri_zero +; CHECK: ldrb r0, [r0] +define i32 @ldrb_ri_zero(i8* %p) { +entry: + %0 = load i8, i8* %p, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrh_ri_zero +; CHECK: ldrh r0, [r0] +define i32 @ldrh_ri_zero(i16* %p) { +entry: + %0 = load i16, i16* %p, align 2 + %conv = zext i16 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldr_ri_zero +; CHECK: ldr r0, [r0] +define i32 @ldr_ri_zero(i32* %p) { +entry: + %0 = load i32, i32* %p, align 4 + ret i32 %0 +} + +; CHECK-LABEL: strb_ri_zero +; CHECK: strb r1, [r0] +define void @strb_ri_zero(i8* %p, i32 %x) { +entry: + %conv = trunc i32 %x to i8 + store i8 %conv, i8* %p, align 1 + ret void +} + +; CHECK-LABEL: strh_ri_zero +; CHECK: strh r1, [r0] +define void @strh_ri_zero(i16* %p, i32 %x) { +entry: + %conv = trunc i32 %x to i16 + store i16 %conv, i16* %p, align 2 + ret void +} + +; CHECK-LABEL: str_ri_zero +; CHECK: str r1, [r0] +define void @str_ri_zero(i32* %p, i32 %x) { +entry: + store i32 %x, i32* %p, align 4 + ret void +} + + +; Maximum Thumb-1 immediate offset + +; CHECK-LABEL: ldrsb_ri_t1_max +; CHECK-T1: movs r1, #31 +; CHECK-T1: ldrsb r0, [r0, r1] +; CHECK-T2: ldrsb.w r0, [r0, #31] +define i32 @ldrsb_ri_t1_max(i8* %p) { +entry: + %arrayidx = getelementptr inbounds i8, i8* %p, i32 31 + %0 = load i8, i8* %arrayidx, align 1 + %conv = sext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrsh_ri_t1_max +; CHECK-T1: movs r1, #62 +; CHECK-T1: ldrsh r0, [r0, r1] +; CHECK-T2: ldrsh.w r0, [r0, #62] +define i32 @ldrsh_ri_t1_max(i16* %p) { +entry: + %arrayidx = getelementptr inbounds i16, i16* %p, i32 31 + %0 = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrb_ri_t1_max +; CHECK: ldrb r0, [r0, #31] +define i32 @ldrb_ri_t1_max(i8* %p) { +entry: + %arrayidx = getelementptr inbounds i8, i8* %p, i32 31 + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrh_ri_t1_max +; CHECK: ldrh r0, [r0, #62] +define i32 @ldrh_ri_t1_max(i16* %p) { +entry: + %arrayidx = getelementptr inbounds i16, i16* %p, i32 31 + %0 = load i16, i16* %arrayidx, align 2 + %conv = zext i16 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldr_ri_t1_max +; CHECK: ldr r0, [r0, #124] +define i32 @ldr_ri_t1_max(i32* %p) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %p, i32 31 + %0 = load i32, i32* %arrayidx, align 4 + ret i32 %0 +} + +; CHECK-LABEL: strb_ri_t1_max +; CHECK: strb r1, [r0, #31] +define void @strb_ri_t1_max(i8* %p, i32 %x) { +entry: + %conv = trunc i32 %x to i8 + %arrayidx = getelementptr inbounds i8, i8* %p, i32 31 + store i8 %conv, i8* %arrayidx, align 1 + ret void +} + +; CHECK-LABEL: strh_ri_t1_max +; CHECK: strh r1, [r0, #62] +define void @strh_ri_t1_max(i16* %p, i32 %x) { +entry: + %conv = trunc i32 %x to i16 + %arrayidx = getelementptr inbounds i16, i16* %p, i32 31 + store i16 %conv, i16* %arrayidx, align 2 + ret void +} + +; CHECK-LABEL: str_ri_t1_max +; CHECK: str r1, [r0, #124] +define void @str_ri_t1_max(i32* %p, i32 %x) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %p, i32 31 + store i32 %x, i32* %arrayidx, align 4 + ret void +} + + +; One past maximum Thumb-1 immediate offset + +; CHECK-LABEL: ldrsb_ri_t1_too_big +; CHECK-T1: movs r1, #32 +; CHECK-T1: ldrsb r0, [r0, r1] +; CHECK-T2: ldrsb.w r0, [r0, #32] +define i32 @ldrsb_ri_t1_too_big(i8* %p) { +entry: + %arrayidx = getelementptr inbounds i8, i8* %p, i32 32 + %0 = load i8, i8* %arrayidx, align 1 + %conv = sext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrsh_ri_t1_too_big +; CHECK-T1: movs r1, #64 +; CHECK-T1: ldrsh r0, [r0, r1] +; CHECK-T2: ldrsh.w r0, [r0, #64] +define i32 @ldrsh_ri_t1_too_big(i16* %p) { +entry: + %arrayidx = getelementptr inbounds i16, i16* %p, i32 32 + %0 = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrb_ri_t1_too_big +; CHECK-T1: movs r1, #32 +; CHECK-T1: ldrb r0, [r0, r1] +; CHECK-T2: ldrb.w r0, [r0, #32] +define i32 @ldrb_ri_t1_too_big(i8* %p) { +entry: + %arrayidx = getelementptr inbounds i8, i8* %p, i32 32 + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrh_ri_t1_too_big +; CHECK-T1: movs r1, #64 +; CHECK-T1: ldrh r0, [r0, r1] +; CHECK-T2: ldrh.w r0, [r0, #64] +define i32 @ldrh_ri_t1_too_big(i16* %p) { entry: - %tmp = load i8, i8* %p ; [#uses=1] - %tmp1 = sext i8 %tmp to i32 ; [#uses=1] - ret i32 %tmp1 + %arrayidx = getelementptr inbounds i16, i16* %p, i32 32 + %0 = load i16, i16* %arrayidx, align 2 + %conv = zext i16 %0 to i32 + ret i32 %conv } -define i32 @f2(i8* %p) { +; CHECK-LABEL: ldr_ri_t1_too_big +; CHECK-T1: movs r1, #128 +; CHECK-T1: ldr r0, [r0, r1] +; CHECK-T2: ldr.w r0, [r0, #128] +define i32 @ldr_ri_t1_too_big(i32* %p) { entry: - %tmp = load i8, i8* %p ; [#uses=1] - %tmp2 = zext i8 %tmp to i32 ; [#uses=1] - ret i32 %tmp2 + %arrayidx = getelementptr inbounds i32, i32* %p, i32 32 + %0 = load i32, i32* %arrayidx, align 4 + ret i32 %0 } -define i32 @f3(i16* %p) { +; CHECK-LABEL: strb_ri_t1_too_big +; CHECK-T1: movs r2, #32 +; CHECK-T1: strb r1, [r0, r2] +; CHECK-T2: strb.w r1, [r0, #32] +define void @strb_ri_t1_too_big(i8* %p, i32 %x) { entry: - %tmp = load i16, i16* %p ; [#uses=1] - %tmp3 = sext i16 %tmp to i32 ; [#uses=1] - ret i32 %tmp3 + %conv = trunc i32 %x to i8 + %arrayidx = getelementptr inbounds i8, i8* %p, i32 32 + store i8 %conv, i8* %arrayidx, align 1 + ret void } -define i32 @f4(i16* %p) { +; CHECK-LABEL: strh_ri_t1_too_big +; CHECK-T1: movs r2, #64 +; CHECK-T1: strh r1, [r0, r2] +; CHECK-T2: strh.w r1, [r0, #64] +define void @strh_ri_t1_too_big(i16* %p, i32 %x) { entry: - %tmp = load i16, i16* %p ; [#uses=1] - %tmp4 = zext i16 %tmp to i32 ; [#uses=1] - ret i32 %tmp4 + %conv = trunc i32 %x to i16 + %arrayidx = getelementptr inbounds i16, i16* %p, i32 32 + store i16 %conv, i16* %arrayidx, align 2 + ret void } -; CHECK: ldrsb -; CHECK: ldrb -; CHECK: ldrsh -; CHECK: ldrh +; CHECK-LABEL: str_ri_t1_too_big +; CHECK-T1: movs r2, #128 +; CHECK-T1: str r1, [r0, r2] +; CHECK-T2: str.w r1, [r0, #128] +define void @str_ri_t1_too_big(i32* %p, i32 %x) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %p, i32 32 + store i32 %x, i32* %arrayidx, align 4 + ret void +} + + +; Maximum Thumb-2 immediate offset + +; CHECK-LABEL: ldrsb_ri_t2_max +; CHECK-T1: ldr r1, .LCP +; CHECK-T1: ldrsb r0, [r0, r1] +; CHECK-T2: ldrsb.w r0, [r0, #4095] +define i32 @ldrsb_ri_t2_max(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095 + %0 = load i8, i8* %add.ptr, align 1 + %conv = sext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrsh_ri_t2_max +; CHECK-T1: ldr r1, .LCP +; CHECK-T1: ldrsh r0, [r0, r1] +; CHECK-T2: ldrsh.w r0, [r0, #4095] +define i32 @ldrsh_ri_t2_max(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095 + %0 = bitcast i8* %add.ptr to i16* + %1 = load i16, i16* %0, align 2 + %conv = sext i16 %1 to i32 + ret i32 %conv +} +; CHECK-LABEL: ldrb_ri_t2_max +; CHECK-T1: ldr r1, .LCP +; CHECK-T1: ldrb r0, [r0, r1] +; CHECK-T2: ldrb.w r0, [r0, #4095] +define i32 @ldrb_ri_t2_max(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095 + %0 = load i8, i8* %add.ptr, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrh_ri_t2_max +; CHECK-T1: ldr r1, .LCP +; CHECK-T1: ldrh r0, [r0, r1] +; CHECK-T2: ldrh.w r0, [r0, #4095] +define i32 @ldrh_ri_t2_max(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095 + %0 = bitcast i8* %add.ptr to i16* + %1 = load i16, i16* %0, align 2 + %conv = zext i16 %1 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldr_ri_t2_max +; CHECK-T1: ldr r1, .LCP +; CHECK-T1: ldr r0, [r0, r1] +; CHECK-T2: ldr.w r0, [r0, #4095] +define i32 @ldr_ri_t2_max(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095 + %0 = bitcast i8* %add.ptr to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +; CHECK-LABEL: strb_ri_t2_max +; CHECK-T1: ldr r2, .LCP +; CHECK-T1: strb r1, [r0, r2] +; CHECK-T2: strb.w r1, [r0, #4095] +define void @strb_ri_t2_max(i8* %p, i32 %x) { +entry: + %conv = trunc i32 %x to i8 + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095 + store i8 %conv, i8* %add.ptr, align 1 + ret void +} + +; CHECK-LABEL: strh_ri_t2_max +; CHECK-T1: ldr r2, .LCP +; CHECK-T1: strh r1, [r0, r2] +; CHECK-T2: strh.w r1, [r0, #4095] +define void @strh_ri_t2_max(i8* %p, i32 %x) { +entry: + %conv = trunc i32 %x to i16 + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095 + %0 = bitcast i8* %add.ptr to i16* + store i16 %conv, i16* %0, align 2 + ret void +} + +; CHECK-LABEL: str_ri_t2_max +; CHECK-T1: ldr r2, .LCP +; CHECK-T1: str r1, [r0, r2] +; CHECK-T2: str.w r1, [r0, #4095] +define void @str_ri_t2_max(i8* %p, i32 %x) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095 + %0 = bitcast i8* %add.ptr to i32* + store i32 %x, i32* %0, align 4 + ret void +} + + +; One past maximum Thumb-2 immediate offset + +; CHECK-LABEL: ldrsb_ri_t2_too_big +; CHECK-T1: movs r1, #1 +; CHECK-T1: lsls r1, r1, #12 +; CHECK-T2: mov.w r1, #4096 +; CHECK: ldrsb r0, [r0, r1] +define i32 @ldrsb_ri_t2_too_big(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096 + %0 = load i8, i8* %add.ptr, align 1 + %conv = sext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrsh_ri_t2_too_big +; CHECK-T1: movs r1, #1 +; CHECK-T1: lsls r1, r1, #12 +; CHECK-T2: mov.w r1, #4096 +; CHECK: ldrsh r0, [r0, r1] +define i32 @ldrsh_ri_t2_too_big(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096 + %0 = bitcast i8* %add.ptr to i16* + %1 = load i16, i16* %0, align 2 + %conv = sext i16 %1 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrb_ri_t2_too_big +; CHECK-T1: movs r1, #1 +; CHECK-T1: lsls r1, r1, #12 +; CHECK-T2: mov.w r1, #4096 +; CHECK: ldrb r0, [r0, r1] +define i32 @ldrb_ri_t2_too_big(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096 + %0 = load i8, i8* %add.ptr, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrh_ri_t2_too_big +; CHECK-T1: movs r1, #1 +; CHECK-T1: lsls r1, r1, #12 +; CHECK-T2: mov.w r1, #4096 +; CHECK: ldrh r0, [r0, r1] +define i32 @ldrh_ri_t2_too_big(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096 + %0 = bitcast i8* %add.ptr to i16* + %1 = load i16, i16* %0, align 2 + %conv = zext i16 %1 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldr_ri_t2_too_big +; CHECK-T1: movs r1, #1 +; CHECK-T1: lsls r1, r1, #12 +; CHECK-T2: mov.w r1, #4096 +; CHECK: ldr r0, [r0, r1] +define i32 @ldr_ri_t2_too_big(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096 + %0 = bitcast i8* %add.ptr to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +; CHECK-LABEL: strb_ri_t2_too_big +; CHECK-T1: movs r2, #1 +; CHECK-T1: lsls r2, r2, #12 +; CHECK-T2: mov.w r2, #4096 +; CHECK: strb r1, [r0, r2] +define void @strb_ri_t2_too_big(i8* %p, i32 %x) { +entry: + %conv = trunc i32 %x to i8 + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096 + store i8 %conv, i8* %add.ptr, align 1 + ret void +} + +; CHECK-LABEL: strh_ri_t2_too_big +; CHECK-T1: movs r2, #1 +; CHECK-T1: lsls r2, r2, #12 +; CHECK-T2: mov.w r2, #4096 +; CHECK: strh r1, [r0, r2] +define void @strh_ri_t2_too_big(i8* %p, i32 %x) { +entry: + %conv = trunc i32 %x to i16 + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096 + %0 = bitcast i8* %add.ptr to i16* + store i16 %conv, i16* %0, align 2 + ret void +} + +; CHECK-LABEL: str_ri_t2_too_big +; CHECK-T1: movs r2, #1 +; CHECK-T1: lsls r2, r2, #12 +; CHECK-T2: mov.w r2, #4096 +; CHECK: str r1, [r0, r2] +define void @str_ri_t2_too_big(i8* %p, i32 %x) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096 + %0 = bitcast i8* %add.ptr to i32* + store i32 %x, i32* %0, align 4 + ret void +}