Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h @@ -332,6 +332,9 @@ bool isTruncateFree(Type *SrcTy, Type *DstTy) const override; bool isTruncateFree(EVT SrcVT, EVT DstVT) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; + bool shouldSinkOperands(Instruction *I, + SmallVectorImpl &Ops) const override; + bool isFNegFree(EVT VT) const override; bool isVectorLoadExtDesirable(SDValue ExtVal) const override; Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -79,6 +79,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -112,6 +113,7 @@ #include using namespace llvm; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "arm-isel" @@ -13146,6 +13148,46 @@ return false; } +/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth +/// of the vector elements. +static bool areExtractExts(Value *Ext1, Value *Ext2) { + auto areExtDoubled = [](Instruction *Ext) { + return Ext->getType()->getScalarSizeInBits() == + 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits(); + }; + + if (!match(Ext1, m_ZExtOrSExt(m_Value())) || + !match(Ext2, m_ZExtOrSExt(m_Value())) || + !areExtDoubled(cast(Ext1)) || + !areExtDoubled(cast(Ext2))) + return false; + + return true; +} + +/// Check if sinking \p I's operands to I's basic block is profitable, because +/// the operands can be folded into a target instruction, e.g. +/// sext/zext can be folded into vsubl. +bool ARMTargetLowering::shouldSinkOperands(Instruction *I, + SmallVectorImpl &Ops) const { + if (!Subtarget->hasNEON() || !I->getType()->isVectorTy()) + return false; + + switch (I->getOpcode()) { + case Instruction::Sub: + case Instruction::Add: { + if (!areExtractExts(I->getOperand(0), I->getOperand(1))) + return false; + Ops.push_back(&I->getOperandUse(0)); + Ops.push_back(&I->getOperandUse(1)); + return true; + } + default: + return false; + } + return false; +} + bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { EVT VT = ExtVal.getValueType(); Index: llvm/trunk/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll =================================================================== --- llvm/trunk/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll +++ llvm/trunk/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll @@ -0,0 +1,232 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=armv7-apple-darwin < %s -codegenprepare -S | FileCheck -check-prefix=NEON %s +; RUN: opt -mtriple=armv6-unknown-linux < %s -codegenprepare -S | FileCheck -check-prefix=NONEON %s + +define <8 x i16> @sink_zext(<8 x i8> %a, <8 x i8> %b, i1 %c) { +; NEON-LABEL: @sink_zext( +; NEON-NEXT: entry: +; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NEON: if.then: +; NEON-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16> +; NEON-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] +; NEON-NEXT: ret <8 x i16> [[RES_1]] +; NEON: if.else: +; NEON-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16> +; NEON-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[A]] to <8 x i16> +; NEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] +; NEON-NEXT: ret <8 x i16> [[RES_2]] +; +; NONEON-LABEL: @sink_zext( +; NONEON-NEXT: entry: +; NONEON-NEXT: [[ZA:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; NONEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NONEON: if.then: +; NONEON-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16> +; NONEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[ZB_1]] +; NONEON-NEXT: ret <8 x i16> [[RES_1]] +; NONEON: if.else: +; NONEON-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16> +; NONEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[ZA]], [[ZB_2]] +; NONEON-NEXT: ret <8 x i16> [[RES_2]] +; +entry: + %za = zext <8 x i8> %a to <8 x i16> + br i1 %c, label %if.then, label %if.else + +if.then: + %zb.1 = zext <8 x i8> %b to <8 x i16> + %res.1 = add <8 x i16> %za, %zb.1 + ret <8 x i16> %res.1 + +if.else: + %zb.2 = zext <8 x i8> %b to <8 x i16> + %res.2 = sub <8 x i16> %za, %zb.2 + ret <8 x i16> %res.2 +} + +define <8 x i16> @sink_sext(<8 x i8> %a, <8 x i8> %b, i1 %c) { +; NEON-LABEL: @sink_sext( +; NEON-NEXT: entry: +; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NEON: if.then: +; NEON-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> +; NEON-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] +; NEON-NEXT: ret <8 x i16> [[RES_1]] +; NEON: if.else: +; NEON-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> +; NEON-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[A]] to <8 x i16> +; NEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] +; NEON-NEXT: ret <8 x i16> [[RES_2]] +; +; NONEON-LABEL: @sink_sext( +; NONEON-NEXT: entry: +; NONEON-NEXT: [[ZA:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; NONEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NONEON: if.then: +; NONEON-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> +; NONEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[ZB_1]] +; NONEON-NEXT: ret <8 x i16> [[RES_1]] +; NONEON: if.else: +; NONEON-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> +; NONEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[ZA]], [[ZB_2]] +; NONEON-NEXT: ret <8 x i16> [[RES_2]] +; +entry: + %za = sext <8 x i8> %a to <8 x i16> + br i1 %c, label %if.then, label %if.else + +if.then: + %zb.1 = sext <8 x i8> %b to <8 x i16> + %res.1 = add <8 x i16> %za, %zb.1 + ret <8 x i16> %res.1 + +if.else: + %zb.2 = sext <8 x i8> %b to <8 x i16> + %res.2 = sub <8 x i16> %za, %zb.2 + ret <8 x i16> %res.2 +} + +define <8 x i16> @do_not_sink_nonfree_zext(<8 x i8> %a, <8 x i16> %b, i1 %c) { +; +; NEON-LABEL: @do_not_sink_nonfree_zext( +; NEON-NEXT: entry: +; NEON-NEXT: [[ZA:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NEON: if.then: +; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]] +; NEON-NEXT: ret <8 x i16> [[RES_1]] +; NEON: if.else: +; NEON-NEXT: ret <8 x i16> [[B]] +; +; NONEON-LABEL: @do_not_sink_nonfree_zext( +; NONEON-NEXT: entry: +; NONEON-NEXT: [[ZA:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; NONEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NONEON: if.then: +; NONEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]] +; NONEON-NEXT: ret <8 x i16> [[RES_1]] +; NONEON: if.else: +; NONEON-NEXT: ret <8 x i16> [[B]] +; +entry: + %za = zext <8 x i8> %a to <8 x i16> + br i1 %c, label %if.then, label %if.else + +if.then: + %res.1 = add <8 x i16> %za, %b + ret <8 x i16> %res.1 + +if.else: + ret <8 x i16> %b +} + +define <8 x i16> @do_not_sink_nonfree_sext(<8 x i8> %a, <8 x i16> %b, i1 %c) { +; CHECK-LABEL: @do_not_sink_nonfree_sext( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] +; CHECK-NEXT: ret <8 x i16> [[RES_1]] +; CHECK: if.else: +; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> +; CHECK-NEXT: ret <8 x i16> [[ZB_2]] +; +; NEON-LABEL: @do_not_sink_nonfree_sext( +; NEON-NEXT: entry: +; NEON-NEXT: [[ZA:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NEON: if.then: +; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]] +; NEON-NEXT: ret <8 x i16> [[RES_1]] +; NEON: if.else: +; NEON-NEXT: ret <8 x i16> [[B]] +; +; NONEON-LABEL: @do_not_sink_nonfree_sext( +; NONEON-NEXT: entry: +; NONEON-NEXT: [[ZA:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; NONEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NONEON: if.then: +; NONEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]] +; NONEON-NEXT: ret <8 x i16> [[RES_1]] +; NONEON: if.else: +; NONEON-NEXT: ret <8 x i16> [[B]] +; +entry: + %za = sext <8 x i8> %a to <8 x i16> + br i1 %c, label %if.then, label %if.else + +if.then: + %res.1 = add <8 x i16> %za, %b + ret <8 x i16> %res.1 + +if.else: + ret <8 x i16> %b +} + +declare void @user1(<8 x i16>) + +; Exts can be sunk. +define <8 x i16> @sink_shufflevector_ext_subadd_multiuse(<16 x i8> %a, <16 x i8> %b) { +; NEON-LABEL: @sink_shufflevector_ext_subadd_multiuse( +; NEON-NEXT: entry: +; NEON-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> +; NEON-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> +; NEON-NEXT: [[Z3:%.*]] = sext <8 x i8> [[S3]] to <8 x i16> +; NEON-NEXT: call void @user1(<8 x i16> [[Z3]]) +; NEON-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NEON: if.then: +; NEON-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> +; NEON-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> +; NEON-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[S1]] to <8 x i16> +; NEON-NEXT: [[RES1:%.*]] = add <8 x i16> [[TMP0]], [[Z2]] +; NEON-NEXT: ret <8 x i16> [[RES1]] +; NEON: if.else: +; NEON-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> +; NEON-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> +; NEON-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[S3]] to <8 x i16> +; NEON-NEXT: [[RES2:%.*]] = sub <8 x i16> [[TMP1]], [[Z4]] +; NEON-NEXT: ret <8 x i16> [[RES2]] +; +; NONEON-LABEL: @sink_shufflevector_ext_subadd_multiuse( +; NONEON-NEXT: entry: +; NONEON-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> +; NONEON-NEXT: [[Z1:%.*]] = zext <8 x i8> [[S1]] to <8 x i16> +; NONEON-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> +; NONEON-NEXT: [[Z3:%.*]] = sext <8 x i8> [[S3]] to <8 x i16> +; NONEON-NEXT: call void @user1(<8 x i16> [[Z3]]) +; NONEON-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NONEON: if.then: +; NONEON-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> +; NONEON-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> +; NONEON-NEXT: [[RES1:%.*]] = add <8 x i16> [[Z1]], [[Z2]] +; NONEON-NEXT: ret <8 x i16> [[RES1]] +; NONEON: if.else: +; NONEON-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> +; NONEON-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> +; NONEON-NEXT: [[RES2:%.*]] = sub <8 x i16> [[Z3]], [[Z4]] +; NONEON-NEXT: ret <8 x i16> [[RES2]] +; +entry: + %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> + %z1 = zext <8 x i8> %s1 to <8 x i16> + %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> + %z3 = sext <8 x i8> %s3 to <8 x i16> + call void @user1(<8 x i16> %z3) + br i1 undef, label %if.then, label %if.else + +if.then: + %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> + %z2 = zext <8 x i8> %s2 to <8 x i16> + %res1 = add <8 x i16> %z1, %z2 + ret <8 x i16> %res1 + +if.else: + %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> + %z4 = sext <8 x i8> %s4 to <8 x i16> + %res2 = sub <8 x i16> %z3, %z4 + ret <8 x i16> %res2 +}