-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Convert fshl(x, 0, y) to shl(x, and(y, BitWidth - 1)) when BitWidth is pow2 #122362
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Amr Hesham (AmrDeveloper) ChangesConvert Fixes: #122235 Full diff: https://github.com/llvm/llvm-project/pull/122362.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c55c40c88bc845..f0ff76ba57555b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2229,6 +2229,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return BitOp;
}
+ // fshal(X, 0, Y) --> shl(X, and(Y, BitWidth - 1))
+ // fshal(X, 0, Y) --> Shl(X, Y) if Y within the range 0 to type bit width
+ if (match(Op1, m_ZeroInt())) {
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ Value *Op2 = II->getArgOperand(2);
+ if (auto Range = II->getRange(); Range && Range->getLower().sge(0) &&
+ Range->getUpper().sle(BitWidth)) {
+ return BinaryOperator::CreateShl(Op0, Op2);
+ }
+ Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
+ return BinaryOperator::CreateShl(Op0, And);
+ }
+
// Left or right might be masked.
if (SimplifyDemandedInstructionBits(*II))
return &CI;
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index 434cd810296d8c..c0f1ee4a5976bb 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -6,6 +6,7 @@ declare i16 @llvm.fshr.i16(i16, i16, i16)
declare i32 @llvm.fshl.i32(i32, i32, i32)
declare i33 @llvm.fshr.i33(i33, i33, i33)
declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
+declare <2 x i16> @llvm.fshl.v2i16(<2 x i16>, <2 x i16>, <2 x i16>)
declare <2 x i31> @llvm.fshl.v2i31(<2 x i31>, <2 x i31>, <2 x i31>)
declare <3 x i16> @llvm.fshl.v3i16(<3 x i16>, <3 x i16>, <3 x i16>)
@@ -1010,3 +1011,97 @@ define <2 x i32> @fshr_vec_zero_elem(<2 x i32> %x, <2 x i32> %y) {
%fsh = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 2, i32 0>)
ret <2 x i32> %fsh
}
+
+define i16 @fshl_i16_shl(i16 %x, i16 %y) {
+; CHECK-LABEL: @fshl_i16_shl(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[Y:%.*]], 15
+; CHECK-NEXT: [[RES:%.*]] = shl i16 [[X:%.*]], [[TMP0]]
+; CHECK-NEXT: ret i16 [[RES]]
+;
+entry:
+ %res = call i16 @llvm.fshl.i16(i16 %x, i16 0, i16 %y)
+ ret i16 %res
+}
+
+define i32 @fshl_i32_shl(i32 %x, i32 %y) {
+; CHECK-LABEL: @fshl_i32_shl(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[Y:%.*]], 31
+; CHECK-NEXT: [[RES:%.*]] = shl i32 [[X:%.*]], [[TMP0]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+entry:
+ %res = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 %y)
+ ret i32 %res
+}
+
+define <2 x i16> @fshl_vi16_shl(<2 x i16> %x, <2 x i16> %y) {
+; CHECK-LABEL: @fshl_vi16_shl(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i16> [[Y:%.*]], splat (i16 15)
+; CHECK-NEXT: [[RES:%.*]] = shl <2 x i16> [[X:%.*]], [[TMP0]]
+; CHECK-NEXT: ret <2 x i16> [[RES]]
+;
+entry:
+ %res = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %x, <2 x i16> <i16 0, i16 0>, <2 x i16> %y)
+ ret <2 x i16> %res
+}
+
+define <2 x i31> @fshl_vi31_shl(<2 x i31> %x, <2 x i31> %y) {
+; CHECK-LABEL: @fshl_vi31_shl(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i31> [[Y:%.*]], splat (i31 30)
+; CHECK-NEXT: [[RES:%.*]] = shl <2 x i31> [[X:%.*]], [[TMP0]]
+; CHECK-NEXT: ret <2 x i31> [[RES]]
+;
+entry:
+ %res = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> <i31 0, i31 0>, <2 x i31> %y)
+ ret <2 x i31> %res
+}
+
+define i16 @fshl_i16_shl_with_range(i16 %x, i16 range(i16 0, 16) %y) {
+; CHECK-LABEL: @fshl_i16_shl_with_range(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RES:%.*]] = shl i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i16 [[RES]]
+;
+entry:
+ %res = call i16 @llvm.fshl.i16(i16 %x, i16 0, i16 %y)
+ ret i16 %res
+}
+
+define i32 @fshl_i32_shl_with_range(i32 %x, i32 range(i32 0, 32) %y) {
+; CHECK-LABEL: @fshl_i32_shl_with_range(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RES:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+entry:
+ %res = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 %y)
+ ret i32 %res
+}
+
+define i16 @fshl_i16_shl_with_range_ignored(i16 %x, i16 range(i16 0, 17) %y) {
+; CHECK-LABEL: @fshl_i16_shl_with_range_ignored(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[Y:%.*]], 15
+; CHECK-NEXT: [[RES:%.*]] = shl i16 [[X:%.*]], [[TMP0]]
+; CHECK-NEXT: ret i16 [[RES]]
+;
+entry:
+ %res = call i16 @llvm.fshl.i16(i16 %x, i16 0, i16 %y)
+ ret i16 %res
+}
+
+define i32 @fshl_i32_shl_with_range_ignored(i32 %x, i32 range(i32 0, 33) %y) {
+; CHECK-LABEL: @fshl_i32_shl_with_range_ignored(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[Y:%.*]], 31
+; CHECK-NEXT: [[RES:%.*]] = shl i32 [[X:%.*]], [[TMP0]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+entry:
+ %res = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 %y)
+ ret i32 %res
+}
|
if (auto Range = II->getRange(); Range && Range->getLower().sge(0) && | ||
Range->getUpper().sle(BitWidth)) { | ||
return BinaryOperator::CreateShl(Op0, Op2); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You don't need this code, the and below will be optimized away if it's unnecessary.
Range->getUpper().sle(BitWidth)) { | ||
return BinaryOperator::CreateShl(Op0, Op2); | ||
} | ||
Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This transform needs to be limited to power of two bitwidths, otherwise this is incorrect.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You could do Thats probably not worth it then.(urem, Op2, BitWidth)
.
@@ -2229,6 +2229,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { | |||
return BitOp; | |||
} | |||
|
|||
// fshal(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// fshal(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) | |
// fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) |
Convert
fshl(x, 0, y)
toshl(X, and(Y, BitWidth - 1))
or toshl(x, y)
if y within range (0, Bitwidth - 1)Fixes: #122235