Line 0
Link Here
|
|
|
1 |
From f8e146f3430de3a6cd904f3f3f7aa1bfaefee14c Mon Sep 17 00:00:00 2001 |
2 |
From: Bjorn Pettersson <bjorn.a.pettersson@ericsson.com> |
3 |
Date: Thu, 28 Nov 2019 23:18:28 +0100 |
4 |
Subject: [PATCH] [InstCombine] Fix big-endian miscompile of (bitcast |
5 |
(zext/trunc (bitcast))) |
6 |
|
7 |
Summary: |
8 |
optimizeVectorResize is rewriting patterns like: |
9 |
%1 = bitcast vector %src to integer |
10 |
%2 = trunc/zext %1 |
11 |
%dst = bitcast %2 to vector |
12 |
|
13 |
Since bitcasting between integer an vector types gives |
14 |
different integer values depending on endianness, we need |
15 |
to take endianness into account. As it happens the old |
16 |
implementation only produced the correct result for little |
17 |
endian targets. |
18 |
|
19 |
Fixes: https://bugs.llvm.org/show_bug.cgi?id=44178 |
20 |
|
21 |
Reviewers: spatel, lattner, lebedev.ri |
22 |
|
23 |
Reviewed By: spatel, lebedev.ri |
24 |
|
25 |
Subscribers: lebedev.ri, hiraditya, uabelho, llvm-commits |
26 |
|
27 |
Tags: #llvm |
28 |
|
29 |
Differential Revision: https://reviews.llvm.org/D70844 |
30 |
|
31 |
(cherry picked from commit a9d6b0e5444741d08ff1df7cf71d1559e7fefc1f) |
32 |
--- |
33 |
.../InstCombine/InstCombineCasts.cpp | 79 +++++++++++++------ |
34 |
llvm/test/Transforms/InstCombine/cast.ll | 6 +- |
35 |
2 files changed, 60 insertions(+), 25 deletions(-) |
36 |
|
37 |
--- src/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp.orig 2020-04-07 15:52:51 UTC |
38 |
+++ src/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp |
39 |
@@ -18,6 +18,7 @@ |
40 |
#include "llvm/IR/DIBuilder.h" |
41 |
#include "llvm/IR/PatternMatch.h" |
42 |
#include "llvm/Support/KnownBits.h" |
43 |
+#include <numeric> |
44 |
using namespace llvm; |
45 |
using namespace PatternMatch; |
46 |
|
47 |
@@ -1820,12 +1821,24 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst |
48 |
} |
49 |
|
50 |
/// This input value (which is known to have vector type) is being zero extended |
51 |
-/// or truncated to the specified vector type. |
52 |
+/// or truncated to the specified vector type. Since the zext/trunc is done |
53 |
+/// using an integer type, we have a (bitcast(cast(bitcast))) pattern, |
54 |
+/// endianness will impact which end of the vector that is extended or |
55 |
+/// truncated. |
56 |
+/// |
57 |
+/// A vector is always stored with index 0 at the lowest address, which |
58 |
+/// corresponds to the most significant bits for a big endian stored integer and |
59 |
+/// the least significant bits for little endian. A trunc/zext of an integer |
60 |
+/// impacts the big end of the integer. Thus, we need to add/remove elements at |
61 |
+/// the front of the vector for big endian targets, and the back of the vector |
62 |
+/// for little endian targets. |
63 |
+/// |
64 |
/// Try to replace it with a shuffle (and vector/vector bitcast) if possible. |
65 |
/// |
66 |
/// The source and destination vector types may have different element types. |
67 |
-static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy, |
68 |
- InstCombiner &IC) { |
69 |
+static Instruction *optimizeVectorResizeWithIntegerBitCasts(Value *InVal, |
70 |
+ VectorType *DestTy, |
71 |
+ InstCombiner &IC) { |
72 |
// We can only do this optimization if the output is a multiple of the input |
73 |
// element size, or the input is a multiple of the output element size. |
74 |
// Convert the input type to have the same element type as the output. |
75 |
@@ -1844,31 +1857,53 @@ static Instruction *optimizeVectorResize(Value *InVal, |
76 |
InVal = IC.Builder.CreateBitCast(InVal, SrcTy); |
77 |
} |
78 |
|
79 |
+ bool IsBigEndian = IC.getDataLayout().isBigEndian(); |
80 |
+ unsigned SrcElts = SrcTy->getNumElements(); |
81 |
+ unsigned DestElts = DestTy->getNumElements(); |
82 |
+ |
83 |
+ assert(SrcElts != DestElts && "Element counts should be different."); |
84 |
+ |
85 |
// Now that the element types match, get the shuffle mask and RHS of the |
86 |
// shuffle to use, which depends on whether we're increasing or decreasing the |
87 |
// size of the input. |
88 |
- SmallVector<uint32_t, 16> ShuffleMask; |
89 |
+ SmallVector<uint32_t, 16> ShuffleMaskStorage; |
90 |
+ ArrayRef<uint32_t> ShuffleMask; |
91 |
Value *V2; |
92 |
|
93 |
- if (SrcTy->getNumElements() > DestTy->getNumElements()) { |
94 |
- // If we're shrinking the number of elements, just shuffle in the low |
95 |
- // elements from the input and use undef as the second shuffle input. |
96 |
- V2 = UndefValue::get(SrcTy); |
97 |
- for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) |
98 |
- ShuffleMask.push_back(i); |
99 |
+ // Produce an identify shuffle mask for the src vector. |
100 |
+ ShuffleMaskStorage.resize(SrcElts); |
101 |
+ std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0); |
102 |
|
103 |
+ if (SrcElts > DestElts) { |
104 |
+ // If we're shrinking the number of elements (rewriting an integer |
105 |
+ // truncate), just shuffle in the elements corresponding to the least |
106 |
+ // significant bits from the input and use undef as the second shuffle |
107 |
+ // input. |
108 |
+ V2 = UndefValue::get(SrcTy); |
109 |
+ // Make sure the shuffle mask selects the "least significant bits" by |
110 |
+ // keeping elements from back of the src vector for big endian, and from the |
111 |
+ // front for little endian. |
112 |
+ ShuffleMask = ShuffleMaskStorage; |
113 |
+ if (IsBigEndian) |
114 |
+ ShuffleMask = ShuffleMask.take_back(DestElts); |
115 |
+ else |
116 |
+ ShuffleMask = ShuffleMask.take_front(DestElts); |
117 |
} else { |
118 |
- // If we're increasing the number of elements, shuffle in all of the |
119 |
- // elements from InVal and fill the rest of the result elements with zeros |
120 |
- // from a constant zero. |
121 |
+ // If we're increasing the number of elements (rewriting an integer zext), |
122 |
+ // shuffle in all of the elements from InVal. Fill the rest of the result |
123 |
+ // elements with zeros from a constant zero. |
124 |
V2 = Constant::getNullValue(SrcTy); |
125 |
- unsigned SrcElts = SrcTy->getNumElements(); |
126 |
- for (unsigned i = 0, e = SrcElts; i != e; ++i) |
127 |
- ShuffleMask.push_back(i); |
128 |
- |
129 |
- // The excess elements reference the first element of the zero input. |
130 |
- for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i) |
131 |
- ShuffleMask.push_back(SrcElts); |
132 |
+ // Use first elt from V2 when indicating zero in the shuffle mask. |
133 |
+ uint32_t NullElt = SrcElts; |
134 |
+ // Extend with null values in the "most significant bits" by adding elements |
135 |
+ // in front of the src vector for big endian, and at the back for little |
136 |
+ // endian. |
137 |
+ unsigned DeltaElts = DestElts - SrcElts; |
138 |
+ if (IsBigEndian) |
139 |
+ ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt); |
140 |
+ else |
141 |
+ ShuffleMaskStorage.append(DeltaElts, NullElt); |
142 |
+ ShuffleMask = ShuffleMaskStorage; |
143 |
} |
144 |
|
145 |
return new ShuffleVectorInst(InVal, V2, |
146 |
@@ -2396,8 +2431,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &C |
147 |
CastInst *SrcCast = cast<CastInst>(Src); |
148 |
if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0))) |
149 |
if (isa<VectorType>(BCIn->getOperand(0)->getType())) |
150 |
- if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0), |
151 |
- cast<VectorType>(DestTy), *this)) |
152 |
+ if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts( |
153 |
+ BCIn->getOperand(0), cast<VectorType>(DestTy), *this)) |
154 |
return I; |
155 |
} |
156 |
|