Skip to content

Commit 5b74cc3

Browse files
committed
Merge from 'main' to 'sycl-web' (#8)
CONFLICT (content): Merge conflict in clang/lib/Frontend/CompilerInvocation.cpp
2 parents 94660ed + a7dcd3a commit 5b74cc3

37 files changed

+323
-251
lines changed

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2205,12 +2205,6 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
22052205
}
22062206
}
22072207

2208-
if (const Arg *A = Args.getLastArg(OPT_fcf_protection_EQ)) {
2209-
StringRef Name = A->getValue();
2210-
if (Name == "full" || Name == "branch") {
2211-
Opts.CFProtectionBranch = 1;
2212-
}
2213-
}
22142208
// -cl-std only applies for OpenCL language standards.
22152209
// Override the -std option in this case.
22162210
if (const Arg *A = Args.getLastArg(OPT_cl_std_EQ)) {
@@ -2233,7 +2227,6 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
22332227
LangStd = OpenCLLangStd;
22342228
}
22352229

2236-
Opts.SYCLIsDevice = Opts.SYCL && Args.hasArg(options::OPT_fsycl_is_device);
22372230
if (Opts.SYCL) {
22382231
Opts.SYCLIsDevice = Args.hasArg(options::OPT_fsycl_is_device);
22392232
Opts.SYCLIsHost = Args.hasArg(options::OPT_fsycl_is_host);
@@ -2276,6 +2269,15 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
22762269

22772270
CompilerInvocation::setLangDefaults(Opts, IK, T, Includes, LangStd);
22782271

2272+
if (const Arg *A = Args.getLastArg(OPT_fcf_protection_EQ)) {
2273+
StringRef Name = A->getValue();
2274+
if (Name == "full" || Name == "branch") {
2275+
Opts.CFProtectionBranch = 1;
2276+
}
2277+
}
2278+
2279+
Opts.SYCLIsDevice = Opts.SYCL && Args.hasArg(options::OPT_fsycl_is_device);
2280+
22792281
// -cl-strict-aliasing needs to emit diagnostic in the case where CL > 1.0.
22802282
// This option should be deprecated for CL > 1.0 because
22812283
// this option was added for compatibility with OpenCL 1.0.
@@ -3014,6 +3016,7 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
30143016
// PIClevel and PIELevel are needed during code generation and this should be
30153017
// set regardless of the input type.
30163018
LangOpts.PICLevel = getLastArgIntValue(Args, OPT_pic_level, 0, Diags);
3019+
LangOpts.PIE = Args.hasArg(OPT_pic_is_pie);
30173020
parseSanitizerKinds("-fsanitize=", Args.getAllArgValues(OPT_fsanitize_EQ),
30183021
Diags, LangOpts.Sanitize);
30193022
} else {

libc/src/string/memory_utils/memcpy_utils.h

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -72,28 +72,35 @@ static void CopyBlockOverlap(char *__restrict dst, const char *__restrict src,
7272

7373
// Copies `count` bytes by blocks of `kBlockSize` bytes.
7474
// Copies at the start and end of the buffer are unaligned.
75-
// Copies in the middle of the buffer are aligned to `kBlockSize`.
75+
// Copies in the middle of the buffer are aligned to `kAlignment`.
7676
//
7777
// e.g. with
7878
// [12345678123456781234567812345678]
79-
// [__XXXXXXXXXXXXXXXXXXXXXXXXXXX___]
80-
// [__XXXXXXXX______________________]
81-
// [________XXXXXXXX________________]
82-
// [________________XXXXXXXX________]
83-
// [_____________________XXXXXXXX___]
79+
// [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
80+
// [__XXXX___________________________]
81+
// [_____XXXXXXXX____________________]
82+
// [_____________XXXXXXXX____________]
83+
// [_____________________XXXXXXXX____]
84+
// [______________________XXXXXXXX___]
8485
//
85-
// Precondition: `count > 2 * kBlockSize` for efficiency.
86-
// `count >= kBlockSize` for correctness.
87-
template <size_t kBlockSize>
86+
// Precondition: `kAlignment <= kBlockSize`
87+
// `count > 2 * kBlockSize` for efficiency.
88+
// `count >= kAlignment` for correctness.
89+
template <size_t kBlockSize, size_t kAlignment = kBlockSize>
8890
static void CopyAlignedBlocks(char *__restrict dst, const char *__restrict src,
8991
size_t count) {
90-
CopyBlock<kBlockSize>(dst, src); // Copy first block
92+
static_assert(is_power2(kAlignment), "kAlignment must be a power of two");
93+
static_assert(is_power2(kBlockSize), "kBlockSize must be a power of two");
94+
static_assert(kAlignment <= kBlockSize,
95+
"kAlignment must be less or equal to block size");
96+
CopyBlock<kAlignment>(dst, src); // Copy first block
9197

9298
// Copy aligned blocks
93-
const size_t ofla = offset_from_last_aligned<kBlockSize>(src);
99+
const size_t ofla = offset_from_last_aligned<kAlignment>(src);
94100
const size_t limit = count + ofla - kBlockSize;
95-
for (size_t offset = kBlockSize; offset < limit; offset += kBlockSize)
96-
CopyBlock<kBlockSize>(dst - ofla + offset, src - ofla + offset);
101+
for (size_t offset = kAlignment; offset < limit; offset += kBlockSize)
102+
CopyBlock<kBlockSize>(dst - ofla + offset,
103+
assume_aligned<kAlignment>(src - ofla + offset));
97104

98105
CopyLastBlock<kBlockSize>(dst, src, count); // Copy last block
99106
}

libc/src/string/memory_utils/utils.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ static inline intptr_t offset_to_next_cache_line(const void *ptr) {
6060
return offset_to_next_aligned<LLVM_LIBC_CACHELINE_SIZE>(ptr);
6161
}
6262

63+
template <size_t alignment, typename T> static T *assume_aligned(T *ptr) {
64+
return reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignment));
65+
}
66+
6367
} // namespace __llvm_libc
6468

6569
#endif // LLVM_LIBC_SRC_MEMORY_UTILS_H

libc/test/src/string/memory_utils/memcpy_utils_test.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,24 @@ TEST(MemcpyUtilsTest, CopyAlignedBlocks) {
211211
EXPECT_STREQ(trace.Read(), "011121111111");
212212
}
213213

214-
TEST(MemcpyUtilsTest, MaxReloads) {
214+
TEST(MemcpyUtilsTest, CopyAlignedBlocksWithAlignment) {
215+
auto &trace = GetTrace();
216+
// Source is aligned and multiple of alignment.
217+
// "11111111"
218+
trace.Clear();
219+
CopyAlignedBlocks<8, 4>(I(0), I(0), 8);
220+
EXPECT_STREQ(trace.Write(), "22221111");
221+
EXPECT_STREQ(trace.Read(), "22221111");
222+
223+
// Source is aligned and multiple of alignment.
224+
// "111111111"
225+
trace.Clear();
226+
CopyAlignedBlocks<8, 4>(I(0), I(0), 9);
227+
EXPECT_STREQ(trace.Write(), "122211111");
228+
EXPECT_STREQ(trace.Read(), "122211111");
229+
}
230+
231+
TEST(MemcpyUtilsTest, CopyAlignedBlocksMaxReloads) {
215232
auto &trace = GetTrace();
216233
for (size_t alignment = 0; alignment < 32; ++alignment) {
217234
for (size_t count = 64; count < 768; ++count) {
@@ -231,4 +248,24 @@ TEST(MemcpyUtilsTest, MaxReloads) {
231248
}
232249
}
233250

251+
TEST(MemcpyUtilsTest, CopyAlignedBlocksWithAlignmentMaxReloads) {
252+
auto &trace = GetTrace();
253+
for (size_t alignment = 0; alignment < 32; ++alignment) {
254+
for (size_t count = 64; count < 768; ++count) {
255+
trace.Clear();
256+
// We should never reload more than twice when copying from count = 2x32.
257+
CopyAlignedBlocks<32, 16>(I(alignment), I(0), count);
258+
const char *const written = trace.Write();
259+
// First bytes are untouched.
260+
for (size_t i = 0; i < alignment; ++i)
261+
EXPECT_EQ(written[i], '0');
262+
// Next bytes are loaded once or twice but no more.
263+
for (size_t i = alignment; i < count; ++i) {
264+
EXPECT_GE(written[i], '1');
265+
EXPECT_LE(written[i], '2');
266+
}
267+
}
268+
}
269+
}
270+
234271
} // namespace __llvm_libc

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20887,6 +20887,32 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
2088720887
continue;
2088820888
}
2088920889

20890+
// Last chance - see if the vector is another shuffle and if it
20891+
// uses one of the existing candidate shuffle ops.
20892+
if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
20893+
int InnerIdx = CurrentSVN->getMaskElt(Idx);
20894+
if (InnerIdx < 0) {
20895+
Mask.push_back(-1);
20896+
continue;
20897+
}
20898+
SDValue InnerVec = (InnerIdx < (int)NumElts)
20899+
? CurrentSVN->getOperand(0)
20900+
: CurrentSVN->getOperand(1);
20901+
if (InnerVec.isUndef()) {
20902+
Mask.push_back(-1);
20903+
continue;
20904+
}
20905+
InnerIdx %= NumElts;
20906+
if (InnerVec == SV0) {
20907+
Mask.push_back(InnerIdx);
20908+
continue;
20909+
}
20910+
if (InnerVec == SV1) {
20911+
Mask.push_back(InnerIdx + NumElts);
20912+
continue;
20913+
}
20914+
}
20915+
2089020916
// Bail out if we cannot convert the shuffle pair into a single shuffle.
2089120917
return false;
2089220918
}

llvm/lib/Transforms/Scalar/LoopSink.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ static cl::opt<unsigned> MaxNumberOfUseBBsForSinking(
7070
cl::desc("Do not sink instructions that have too many uses."));
7171

7272
static cl::opt<bool> EnableMSSAInLoopSink(
73-
"enable-mssa-in-loop-sink", cl::Hidden, cl::init(false),
73+
"enable-mssa-in-loop-sink", cl::Hidden, cl::init(true),
7474
cl::desc("Enable MemorySSA for LoopSink in new pass manager"));
7575

7676
static cl::opt<bool> EnableMSSAInLegacyLoopSink(

0 commit comments

Comments
 (0)