From 6d0be74af5555f7bc56ac72cbd98ff270fd1291b Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 4 Oct 2020 19:06:28 -0700 Subject: [PATCH 001/321] llvm-dwarfdump: Don't try to parse rnglist tables when dumping CUs It's not possible to do this in complete generality - a CU using a sec_offset DW_AT_ranges has no way of knowing where its rnglists contribution starts, so should not attempt to parse any full rnglist table/header to do so. And even using FORM_rnglistx there's no need to parse the header - the offset can be computed using the CU's DWARF format (32 or 64) to compute offset entry sizes, and then the list parsed at that offset without ever trying to find a rnglist contribution header immediately prior to the rnglists_base. --- .../llvm/DebugInfo/DWARF/DWARFListTable.h | 15 ++-- llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h | 1 - llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 72 +++++-------------- .../X86/dwarfdump-rnglists-dwarf64.s | 7 +- llvm/test/DebugInfo/X86/dwarfdump-rnglists.s | 6 +- .../test/tools/llvm-dwarfdump/X86/tombstone.s | 19 ++--- 6 files changed, 42 insertions(+), 78 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h index ee9f7adb96c48..8f58b4e6458e1 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h @@ -116,9 +116,15 @@ class DWARFListTableHeader { if (Index > HeaderData.OffsetEntryCount) return None; + return getOffsetEntry(Data, getHeaderOffset() + getHeaderSize(Format), Format, Index); + } + + static Optional getOffsetEntry(DataExtractor Data, + uint64_t OffsetTableOffset, + dwarf::DwarfFormat Format, + uint32_t Index) { uint8_t OffsetByteSize = Format == dwarf::DWARF64 ? 8 : 4; - uint64_t Offset = - getHeaderOffset() + getHeaderSize(Format) + OffsetByteSize * Index; + uint64_t Offset = OffsetTableOffset + OffsetByteSize * Index; auto R = Data.getUnsigned(&Offset, OffsetByteSize); return R; } @@ -272,9 +278,10 @@ DWARFListTableBase::findList(DWARFDataExtractor Data, uint64_t Offset) { // Extract the list from the section and enter it into the list map. DWARFListType List; - Data = DWARFDataExtractor(Data, getHeaderOffset() + Header.length()); + if (Header.length()) + Data = DWARFDataExtractor(Data, getHeaderOffset() + Header.length()); if (Error E = - List.extract(Data, getHeaderOffset(), &Offset, + List.extract(Data, Header.length() ? getHeaderOffset() : 0, &Offset, Header.getSectionName(), Header.getListTypeString())) return std::move(E); return List; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h index d0664c271fb57..0b0238f7235e2 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -229,7 +229,6 @@ class DWARFUnit { Optional StringOffsetsTableContribution; /// A table of range lists (DWARF v5 and later). - Optional RngListTable; Optional LoclistTableHeader; mutable const DWARFAbbreviationDeclarationSet *Abbrevs; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index b871e6ebdca56..67066db15b4ab 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -503,27 +503,9 @@ Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) { DWARFListTableHeader::getHeaderSize(Header.getFormat())); } else setRangesSection(&Context.getDWARFObj().getRnglistsSection(), - toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0)); - if (RangeSection->Data.size()) { - // Parse the range list table header. Individual range lists are - // extracted lazily. - DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection, - isLittleEndian, 0); - auto TableOrError = parseListTableHeader( - RangesDA, RangeSectionBase, Header.getFormat()); - if (!TableOrError) - return createStringError(errc::invalid_argument, - "parsing a range list table: " + - toString(TableOrError.takeError())); - - RngListTable = TableOrError.get(); - - // In a split dwarf unit, there is no DW_AT_rnglists_base attribute. - // Adjust RangeSectionBase to point past the table header. - if (IsDWO && RngListTable) - RangeSectionBase = - ContributionBaseOffset + RngListTable->getHeaderSize(); - } + toSectionOffset(UnitDie.find(DW_AT_rnglists_base), + DWARFListTableHeader::getHeaderSize( + Header.getFormat()))); // In a split dwarf unit, there is no DW_AT_loclists_base attribute. // Setting LocSectionBase to point past the table header. @@ -602,19 +584,8 @@ bool DWARFUnit::parseDWO() { if (AddrOffsetSectionBase) DWO->setAddrOffsetSection(AddrOffsetSection, *AddrOffsetSectionBase); if (getVersion() >= 5) { - DWO->setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(), 0); - DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection, - isLittleEndian, 0); - if (auto TableOrError = parseListTableHeader( - RangesDA, RangeSectionBase, Header.getFormat())) - DWO->RngListTable = TableOrError.get(); - else - Context.getRecoverableErrorHandler()(createStringError( - errc::invalid_argument, "parsing a range list table: %s", - toString(TableOrError.takeError()).c_str())); - - if (DWO->RngListTable) - DWO->RangeSectionBase = DWO->RngListTable->getHeaderSize(); + DWO->setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(), + DWARFListTableHeader::getHeaderSize(getFormat())); } else { auto DWORangesBase = UnitDie.getRangesBaseAttribute(); DWO->setRangesSection(RangeSection, DWORangesBase ? *DWORangesBase : 0); @@ -638,17 +609,13 @@ DWARFUnit::findRnglistFromOffset(uint64_t Offset) { return std::move(E); return RangeList.getAbsoluteRanges(getBaseAddress()); } - if (RngListTable) { - DWARFDataExtractor RangesData(Context.getDWARFObj(), *RangeSection, - isLittleEndian, RngListTable->getAddrSize()); - auto RangeListOrError = RngListTable->findList(RangesData, Offset); - if (RangeListOrError) - return RangeListOrError.get().getAbsoluteRanges(getBaseAddress(), *this); - return RangeListOrError.takeError(); - } - - return createStringError(errc::invalid_argument, - "missing or invalid range list table"); + DWARFDataExtractor RangesData(Context.getDWARFObj(), *RangeSection, + isLittleEndian, Header.getAddressByteSize()); + DWARFDebugRnglistTable RnglistTable; + auto RangeListOrError = RnglistTable.findList(RangesData, Offset); + if (RangeListOrError) + return RangeListOrError.get().getAbsoluteRanges(getBaseAddress(), *this); + return RangeListOrError.takeError(); } Expected @@ -656,12 +623,10 @@ DWARFUnit::findRnglistFromIndex(uint32_t Index) { if (auto Offset = getRnglistOffset(Index)) return findRnglistFromOffset(*Offset); - if (RngListTable) - return createStringError(errc::invalid_argument, - "invalid range list table index %d", Index); - return createStringError(errc::invalid_argument, - "missing or invalid range list table"); + "invalid range list table index %d (possibly " + "missing the entire range list table)", + Index); } Expected DWARFUnit::collectAddressRanges() { @@ -1007,11 +972,12 @@ DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor & DA) { } Optional DWARFUnit::getRnglistOffset(uint32_t Index) { - if (!RngListTable) - return None; DataExtractor RangesData(RangeSection->Data, isLittleEndian, getAddressByteSize()); - if (Optional Off = RngListTable->getOffsetEntry(RangesData, Index)) + DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection, + isLittleEndian, 0); + if (Optional Off = llvm::DWARFListTableHeader::getOffsetEntry( + RangesData, RangeSectionBase, getFormat(), Index)) return *Off + RangeSectionBase; return None; } diff --git a/llvm/test/DebugInfo/X86/dwarfdump-rnglists-dwarf64.s b/llvm/test/DebugInfo/X86/dwarfdump-rnglists-dwarf64.s index f6c8751083f3a..5c8eaffe001cf 100644 --- a/llvm/test/DebugInfo/X86/dwarfdump-rnglists-dwarf64.s +++ b/llvm/test/DebugInfo/X86/dwarfdump-rnglists-dwarf64.s @@ -1,8 +1,8 @@ # RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.o # RUN: not llvm-dwarfdump -v -debug-info %t.o 2> %t.err | FileCheck %s -# RUN: FileCheck %s --input-file %t.err --check-prefix=ERR +# RUN: FileCheck %s --input-file %t.err --check-prefix=ERR --implicit-check-not=error # RUN: not llvm-dwarfdump -lookup 10 %t.o 2> %t2.err -# RUN: FileCheck %s --input-file %t2.err --check-prefix=ERR +# RUN: FileCheck %s --input-file %t2.err --check-prefix=ERR --implicit-check-not=error # RUN: llvm-dwarfdump -debug-rnglists %t.o | \ # RUN: FileCheck %s --check-prefix=RNGLISTS @@ -209,8 +209,7 @@ Range1_end: # CHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x1) rangelist = 0x00000025 # CHECK-NEXT: [0x0000002a, 0x00000034)) -#ERR: error: parsing a range list table: did not detect a valid list table with base = 0x8 -#ERR: error: decoding address ranges: missing or invalid range list table +#ERR: error: decoding address ranges: invalid range list offset 0x4000500000008 #ERR: error: decoding address ranges: invalid range list offset 0xfa0 # RNGLISTS: .debug_rnglists contents: diff --git a/llvm/test/DebugInfo/X86/dwarfdump-rnglists.s b/llvm/test/DebugInfo/X86/dwarfdump-rnglists.s index 45cf65c985f8c..74de6427df717 100644 --- a/llvm/test/DebugInfo/X86/dwarfdump-rnglists.s +++ b/llvm/test/DebugInfo/X86/dwarfdump-rnglists.s @@ -1,8 +1,8 @@ # RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.o # RUN: not llvm-dwarfdump -v -debug-info %t.o 2> %t.err | FileCheck %s -# RUN: FileCheck %s --input-file %t.err --check-prefix=ERR +# RUN: FileCheck %s --input-file %t.err --check-prefix=ERR --implicit-check-not=error # RUN: not llvm-dwarfdump -lookup 10 %t.o 2> %t2.err -# RUN: FileCheck %s --input-file %t2.err --check-prefix=ERR +# RUN: FileCheck %s --input-file %t2.err --check-prefix=ERR --implicit-check-not=error # Test object to verify dwarfdump handles v5 range lists. # We use very simplified compile unit dies. @@ -203,6 +203,4 @@ Range1_end: # CHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x1) rangelist = 0x00000015 # CHECK-NEXT: [0x0000002a, 0x00000034)) -#ERR: error: parsing a range list table: did not detect a valid list table with base = 0x8 -#ERR: error: decoding address ranges: missing or invalid range list table #ERR: error: decoding address ranges: invalid range list offset 0xfa0 diff --git a/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s b/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s index 3465d08bf261e..7b4ff70e5ff58 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s @@ -1,7 +1,5 @@ # RUN: llvm-mc %s -filetype obj -triple i386-pc-linux -o %t.o -# RUN: not llvm-dwarfdump -v -debug-info -debug-line -debug-addr -debug-rnglists -debug-ranges %t.o | FileCheck --implicit-check-not=DW_TAG --implicit-check-not=DW_AT %s - -# FIXME: Remove the 'not' once the rnglist are lazily/correctly parsed (see comment below) +# RUN: llvm-dwarfdump -v -debug-info -debug-line -debug-addr -debug-rnglists -debug-ranges %t.o | FileCheck --implicit-check-not=DW_TAG --implicit-check-not=DW_AT %s # Test that llvm - dwarfdump strips addresses relating to dead code(using the # DWARFv6 - proposed tombstone constant & nearest equivalent for debug_ranges) @@ -45,17 +43,14 @@ # CHECK: DW_TAG_compile_unit # CHECK: DW_AT_addr_base -# FIXME: Lazily parse rnglists rather than expecting to be able to parse an -# entire rnglists contribution (since there's no way to know where such a -# contribution starts) - rather than assuming one starts at 0. # CHECK: DW_AT_ranges -# [0x0000000000000042, 0x0000000000000048) -# [0x0000000000000042, 0x0000000000000048) -# [0x0000000000000042, 0x0000000000000048) -# [0x0000000000000042, 0x0000000000000042) -# [0x0000000000000042, 0x0000000000000048) -# [0x0000000000000042, 0x0000000000000048)) +# CHECK-NEXT: [0x0000000000000042, 0x0000000000000048) +# CHECK-NEXT: [0x0000000000000042, 0x0000000000000048) +# CHECK-NEXT: [0x0000000000000042, 0x0000000000000048) +# CHECK-NEXT: [0x0000000000000042, 0x0000000000000042) +# CHECK-NEXT: [0x0000000000000042, 0x0000000000000048) +# CHECK-NEXT: [0x0000000000000042, 0x0000000000000048)) # CHECK: DW_TAG_subprogram # CHECK: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000000) address = 0xffffffffffffffff (dead code)) # CHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000006) From b18026114ab1410d531559ad6d9f1b445b98a35f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 4 Oct 2020 20:34:31 -0700 Subject: [PATCH 002/321] [X86] MWAITX_SAVE_RBX should not have EBX as an implicit use. RBX was copied to a virtual register before this instruction was created. And the EBX input for the final MWAITX is still in a virtual register. So EBX isn't read by this pseudo. --- llvm/lib/Target/X86/X86InstrCompiler.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index c20a2b88e1187..07e7b17230e1b 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -899,7 +899,7 @@ def LCMPXCHG16B_SAVE_RBX : // This pseudo must be used when the frame uses RBX/EBX as // the base pointer. // cf comment for LCMPXCHG16B_SAVE_RBX. -let Defs = [EBX], Uses = [ECX, EAX, EBX], +let Defs = [EBX], Uses = [ECX, EAX], Predicates = [HasMWAITX], SchedRW = [WriteSystem], isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst" in { def MWAITX_SAVE_RBX : From e8beb6988bab71ee4917288f07674b4982736109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Sun, 4 Oct 2020 23:03:46 -0700 Subject: [PATCH 003/321] [lldb] [test/Register] Attempt to fix x86-fp-read.test on Darwin Darwin seems to use stmmN instead of stN. Use a regex to accept both. Also try to actually clear st(7). Differential revision: https://reviews.llvm.org/D88795 --- .../Shell/Register/Inputs/x86-fp-read.cpp | 6 ++++- lldb/test/Shell/Register/x86-fp-read.test | 23 +++++++++---------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/lldb/test/Shell/Register/Inputs/x86-fp-read.cpp b/lldb/test/Shell/Register/Inputs/x86-fp-read.cpp index 1bd2d60affa2e..63971e20b62fc 100644 --- a/lldb/test/Shell/Register/Inputs/x86-fp-read.cpp +++ b/lldb/test/Shell/Register/Inputs/x86-fp-read.cpp @@ -14,7 +14,8 @@ int main() { {0x8000000000000000, 0x7fff}, // +inf {0x8000000000000000, 0xffff}, // -inf {0xc000000000000000, 0xffff}, // nan - // leave st7 empty to test tag word better + // st7 will be freed to test tag word better + {0x0000000000000000, 0x0000}, // +0 }; // unmask divide-by-zero exception @@ -26,6 +27,7 @@ int main() { "finit\n\t" "fldcw %1\n\t" // load on stack in reverse order to make the result easier to read + "fldt 0x70(%0)\n\t" "fldt 0x60(%0)\n\t" "fldt 0x50(%0)\n\t" "fldt 0x40(%0)\n\t" @@ -33,6 +35,8 @@ int main() { "fldt 0x20(%0)\n\t" "fldt 0x10(%0)\n\t" "fldt 0x00(%0)\n\t" + // free st7 + "ffree %%st(7)\n\t" // this should trigger a divide-by-zero "fdivs (%2)\n\t" "int3\n\t" diff --git a/lldb/test/Shell/Register/x86-fp-read.test b/lldb/test/Shell/Register/x86-fp-read.test index eac942f5989cf..f0c35c726c7cd 100644 --- a/lldb/test/Shell/Register/x86-fp-read.test +++ b/lldb/test/Shell/Register/x86-fp-read.test @@ -1,5 +1,4 @@ # XFAIL: system-windows -# XFAIL: system-darwin # REQUIRES: native && (target-x86 || target-x86_64) # RUN: %clangxx_host -g %p/Inputs/x86-fp-read.cpp -o %t # RUN: %lldb -b -s %s %t | FileCheck %s @@ -8,20 +7,20 @@ process launch register read --all # CHECK-DAG: fctrl = 0x037b -# CHECK-DAG: fstat = 0x8884 +# CHECK-DAG: fstat = 0x8084 # TODO: the following value is incorrect, it's a bug in the way -# FXSAVE/XSAVE is interpreted; it should be 0xa963 once fixed -# CHECK-DAG: ftag = 0x00fe +# FXSAVE/XSAVE is interpreted +# CHECK-DAG: ftag = 0x007f # CHECK-DAG: fop = 0x0033 -# CHECK-DAG: st0 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0x00 0x40} -# CHECK-DAG: st1 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x3f 0x00 0x00} -# CHECK-DAG: st2 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00} -# CHECK-DAG: st3 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80} -# CHECK-DAG: st4 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0xff 0x7f} -# CHECK-DAG: st5 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0xff 0xff} -# CHECK-DAG: st6 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xc0 0xff 0xff} -# CHECK-DAG: st7 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00} +# CHECK-DAG: st{{(mm)?}}0 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0x00 0x40} +# CHECK-DAG: st{{(mm)?}}1 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x3f 0x00 0x00} +# CHECK-DAG: st{{(mm)?}}2 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00} +# CHECK-DAG: st{{(mm)?}}3 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80} +# CHECK-DAG: st{{(mm)?}}4 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0xff 0x7f} +# CHECK-DAG: st{{(mm)?}}5 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0xff 0xff} +# CHECK-DAG: st{{(mm)?}}6 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xc0 0xff 0xff} +# CHECK-DAG: st{{(mm)?}}7 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00} # fdiv (%rbx) gets encoded into 2 bytes, int3 into 1 byte print (void*)($pc-3) From a58b20e5a4fb64404cb62d2bb6a5e6dc40d22784 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Sun, 4 Oct 2020 23:46:26 -0700 Subject: [PATCH 004/321] [llvm] Rename DwarfFile to DWARFFile to fix ODR violation (NFC) Rename the DwarfFile class in DWARFLinker to DWARFFile. This is consistent with the other DWARF classes and avoids a ODR violation with the DwarfFile class in AsmPrinter. --- llvm/include/llvm/DWARFLinker/DWARFLinker.h | 60 ++++++++++---------- llvm/lib/DWARFLinker/DWARFLinker.cpp | 40 ++++++------- llvm/tools/dsymutil/DwarfLinkerForBinary.cpp | 8 +-- llvm/tools/dsymutil/DwarfLinkerForBinary.h | 4 +- 4 files changed, 56 insertions(+), 56 deletions(-) diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h index be3c5ebcadaeb..2fb61b9edf559 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h @@ -202,9 +202,9 @@ using UnitListTy = std::vector>; /// this class represents DWARF information for source file /// and it`s address map. -class DwarfFile { +class DWARFFile { public: - DwarfFile(StringRef Name, DWARFContext *Dwarf, AddressesMap *Addresses, + DWARFFile(StringRef Name, DWARFContext *Dwarf, AddressesMap *Addresses, const std::vector &Warnings) : FileName(Name), Dwarf(Dwarf), Addresses(Addresses), Warnings(Warnings) { } @@ -222,7 +222,7 @@ class DwarfFile { typedef std::function messageHandler; -typedef std::function(StringRef ContainerName, +typedef std::function(StringRef ContainerName, StringRef Path)> objFileLoader; typedef std::map swiftInterfacesMap; @@ -249,7 +249,7 @@ class DWARFLinker { : TheDwarfEmitter(Emitter), DwarfLinkerClientID(ClientID) {} /// Add object file to be linked. - void addObjectFile(DwarfFile &File); + void addObjectFile(DWARFFile &File); /// Link debug info for added objFiles. Object /// files are linked all together. @@ -376,13 +376,13 @@ class DWARFLinker { /// returns true if we need to translate strings. bool needToTranslateStrings() { return StringsTranslator != nullptr; } - void reportWarning(const Twine &Warning, const DwarfFile &File, + void reportWarning(const Twine &Warning, const DWARFFile &File, const DWARFDie *DIE = nullptr) const { if (Options.WarningHandler != nullptr) Options.WarningHandler(Warning, File.FileName, DIE); } - void reportError(const Twine &Warning, const DwarfFile &File, + void reportError(const Twine &Warning, const DWARFFile &File, const DWARFDie *DIE = nullptr) const { if (Options.ErrorHandler != nullptr) Options.ErrorHandler(Warning, File.FileName, DIE); @@ -398,18 +398,18 @@ class DWARFLinker { void updateAccelKind(DWARFContext &Dwarf); /// Emit warnings as Dwarf compile units to leave a trail after linking. - bool emitPaperTrailWarnings(const DwarfFile &File, + bool emitPaperTrailWarnings(const DWARFFile &File, OffsetsStringPool &StringPool); void copyInvariantDebugSection(DWARFContext &Dwarf); /// Keeps track of data associated with one object during linking. struct LinkContext { - DwarfFile &File; + DWARFFile &File; UnitListTy CompileUnits; bool Skip = false; - LinkContext(DwarfFile &File) : File(File) {} + LinkContext(DWARFFile &File) : File(File) {} /// Clear part of the context that's no longer needed when we're done with /// the debug object. @@ -438,7 +438,7 @@ class DWARFLinker { /// kept. All DIEs referenced though attributes should be kept. void lookForRefDIEsToKeep(const DWARFDie &Die, CompileUnit &CU, unsigned Flags, const UnitListTy &Units, - const DwarfFile &File, + const DWARFFile &File, SmallVectorImpl &Worklist); /// \defgroup FindRootDIEs Find DIEs corresponding to Address map entries. @@ -450,7 +450,7 @@ class DWARFLinker { /// The return value indicates whether the DIE is incomplete. void lookForDIEsToKeep(AddressesMap &RelocMgr, RangesTy &Ranges, const UnitListTy &Units, const DWARFDie &DIE, - const DwarfFile &File, CompileUnit &CU, + const DWARFFile &File, CompileUnit &CU, unsigned Flags); /// If this compile unit is really a skeleton CU that points to a @@ -460,7 +460,7 @@ class DWARFLinker { /// pointing to the module, and a DW_AT_gnu_dwo_id with the module /// hash. bool registerModuleReference(DWARFDie CUDie, const DWARFUnit &Unit, - const DwarfFile &File, + const DWARFFile &File, OffsetsStringPool &OffsetsStringPool, UniquingStringPool &UniquingStringPoolStringPool, DeclContextTree &ODRContexts, @@ -473,7 +473,7 @@ class DWARFLinker { /// to Units. Error loadClangModule(DWARFDie CUDie, StringRef FilePath, StringRef ModuleName, uint64_t DwoId, - const DwarfFile &File, + const DWARFFile &File, OffsetsStringPool &OffsetsStringPool, UniquingStringPool &UniquingStringPool, DeclContextTree &ODRContexts, uint64_t ModulesEndOffset, @@ -484,11 +484,11 @@ class DWARFLinker { void keepDIEAndDependencies(AddressesMap &RelocMgr, RangesTy &Ranges, const UnitListTy &Units, const DWARFDie &DIE, CompileUnit::DIEInfo &MyInfo, - const DwarfFile &File, CompileUnit &CU, + const DWARFFile &File, CompileUnit &CU, bool UseODR); unsigned shouldKeepDIE(AddressesMap &RelocMgr, RangesTy &Ranges, - const DWARFDie &DIE, const DwarfFile &File, + const DWARFDie &DIE, const DWARFFile &File, CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo, unsigned Flags); @@ -499,7 +499,7 @@ class DWARFLinker { CompileUnit::DIEInfo &MyInfo, unsigned Flags); unsigned shouldKeepSubprogramDIE(AddressesMap &RelocMgr, RangesTy &Ranges, - const DWARFDie &DIE, const DwarfFile &File, + const DWARFDie &DIE, const DWARFFile &File, CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo, unsigned Flags); @@ -508,7 +508,7 @@ class DWARFLinker { /// RefValue. The resulting DIE might be in another CompileUnit which is /// stored into \p ReferencedCU. \returns null if resolving fails for any /// reason. - DWARFDie resolveDIEReference(const DwarfFile &File, const UnitListTy &Units, + DWARFDie resolveDIEReference(const DWARFFile &File, const UnitListTy &Units, const DWARFFormValue &RefValue, const DWARFDie &DIE, CompileUnit *&RefCU); @@ -523,7 +523,7 @@ class DWARFLinker { class DIECloner { DWARFLinker &Linker; DwarfEmitter *Emitter; - DwarfFile &ObjFile; + DWARFFile &ObjFile; /// Allocator used for all the DIEValue objects. BumpPtrAllocator &DIEAlloc; @@ -533,7 +533,7 @@ class DWARFLinker { bool Update; public: - DIECloner(DWARFLinker &Linker, DwarfEmitter *Emitter, DwarfFile &ObjFile, + DIECloner(DWARFLinker &Linker, DwarfEmitter *Emitter, DWARFFile &ObjFile, BumpPtrAllocator &DIEAlloc, std::vector> &CompileUnits, bool Update) @@ -551,7 +551,7 @@ class DWARFLinker { /// applied to the entry point of the function to get the linked address. /// \param Die the output DIE to use, pass NULL to create one. /// \returns the root of the cloned tree or null if nothing was selected. - DIE *cloneDIE(const DWARFDie &InputDIE, const DwarfFile &File, + DIE *cloneDIE(const DWARFDie &InputDIE, const DWARFFile &File, CompileUnit &U, OffsetsStringPool &StringPool, int64_t PCOffset, uint32_t OutOffset, unsigned Flags, bool IsLittleEndian, DIE *Die = nullptr); @@ -560,7 +560,7 @@ class DWARFLinker { /// chose to keep above. If there are no valid relocs, then there's /// nothing to clone/emit. uint64_t cloneAllCompileUnits(DWARFContext &DwarfContext, - const DwarfFile &File, + const DWARFFile &File, OffsetsStringPool &StringPool, bool IsLittleEndian); @@ -606,7 +606,7 @@ class DWARFLinker { /// Helper for cloneDIE. unsigned cloneAttribute(DIE &Die, const DWARFDie &InputDIE, - const DwarfFile &File, CompileUnit &U, + const DWARFFile &File, CompileUnit &U, OffsetsStringPool &StringPool, const DWARFFormValue &Val, const AttributeSpec AttrSpec, unsigned AttrSize, @@ -627,18 +627,18 @@ class DWARFLinker { AttributeSpec AttrSpec, unsigned AttrSize, const DWARFFormValue &Val, - const DwarfFile &File, + const DWARFFile &File, CompileUnit &Unit); /// Clone a DWARF expression that may be referencing another DIE. void cloneExpression(DataExtractor &Data, DWARFExpression Expression, - const DwarfFile &File, CompileUnit &Unit, + const DWARFFile &File, CompileUnit &Unit, SmallVectorImpl &OutputBuffer); /// Clone an attribute referencing another DIE and add /// it to \p Die. /// \returns the size of the new attribute. - unsigned cloneBlockAttribute(DIE &Die, const DwarfFile &File, + unsigned cloneBlockAttribute(DIE &Die, const DWARFFile &File, CompileUnit &Unit, AttributeSpec AttrSpec, const DWARFFormValue &Val, unsigned AttrSize, bool IsLittleEndian); @@ -654,7 +654,7 @@ class DWARFLinker { /// Clone a scalar attribute and add it to \p Die. /// \returns the size of the new attribute. unsigned cloneScalarAttribute(DIE &Die, const DWARFDie &InputDIE, - const DwarfFile &File, CompileUnit &U, + const DWARFFile &File, CompileUnit &U, AttributeSpec AttrSpec, const DWARFFormValue &Val, unsigned AttrSize, AttributesInfo &Info); @@ -670,7 +670,7 @@ class DWARFLinker { void copyAbbrev(const DWARFAbbreviationDeclaration &Abbrev, bool hasODR); uint32_t hashFullyQualifiedName(DWARFDie DIE, CompileUnit &U, - const DwarfFile &File, + const DWARFFile &File, int RecurseDepth = 0); /// Helper for cloneDIE. @@ -685,7 +685,7 @@ class DWARFLinker { /// Compute and emit debug_ranges section for \p Unit, and /// patch the attributes referencing it. void patchRangesForUnit(const CompileUnit &Unit, DWARFContext &Dwarf, - const DwarfFile &File) const; + const DWARFFile &File) const; /// Generate and emit the DW_AT_ranges attribute for a compile_unit if it had /// one. @@ -695,7 +695,7 @@ class DWARFLinker { /// parts according to the linked function ranges and emit the result in the /// debug_line section. void patchLineTableForUnit(CompileUnit &Unit, DWARFContext &OrigDwarf, - const DwarfFile &File); + const DWARFFile &File); /// Emit the accelerator entries for \p Unit. void emitAcceleratorEntriesForUnit(CompileUnit &Unit); @@ -703,7 +703,7 @@ class DWARFLinker { void emitAppleAcceleratorEntriesForUnit(CompileUnit &Unit); /// Patch the frame info for an object file and emit it. - void patchFrameInfoForObject(const DwarfFile &, RangesTy &Ranges, + void patchFrameInfoForObject(const DWARFFile &, RangesTy &Ranges, DWARFContext &, unsigned AddressSize); /// FoldingSet that uniques the abbreviations. diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp index 12b19e77a4223..2b12274281055 100644 --- a/llvm/lib/DWARFLinker/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp @@ -63,7 +63,7 @@ static CompileUnit *getUnitForOffset(const UnitListTy &Units, uint64_t Offset) { /// Resolve the DIE attribute reference that has been extracted in \p RefValue. /// The resulting DIE might be in another CompileUnit which is stored into \p /// ReferencedCU. \returns null if resolving fails for any reason. -DWARFDie DWARFLinker::resolveDIEReference(const DwarfFile &File, +DWARFDie DWARFLinker::resolveDIEReference(const DWARFFile &File, const UnitListTy &Units, const DWARFFormValue &RefValue, const DWARFDie &DIE, @@ -420,7 +420,7 @@ unsigned DWARFLinker::shouldKeepVariableDIE(AddressesMap &RelocMgr, /// \returns updated TraversalFlags. unsigned DWARFLinker::shouldKeepSubprogramDIE( AddressesMap &RelocMgr, RangesTy &Ranges, const DWARFDie &DIE, - const DwarfFile &File, CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo, + const DWARFFile &File, CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo, unsigned Flags) { const auto *Abbrev = DIE.getAbbreviationDeclarationPtr(); @@ -482,7 +482,7 @@ unsigned DWARFLinker::shouldKeepSubprogramDIE( /// Check if a DIE should be kept. /// \returns updated TraversalFlags. unsigned DWARFLinker::shouldKeepDIE(AddressesMap &RelocMgr, RangesTy &Ranges, - const DWARFDie &DIE, const DwarfFile &File, + const DWARFDie &DIE, const DWARFFile &File, CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo, unsigned Flags) { @@ -590,7 +590,7 @@ void DWARFLinker::lookForChildDIEsToKeep( /// kept. All DIEs referenced though attributes should be kept. void DWARFLinker::lookForRefDIEsToKeep( const DWARFDie &Die, CompileUnit &CU, unsigned Flags, - const UnitListTy &Units, const DwarfFile &File, + const UnitListTy &Units, const DWARFFile &File, SmallVectorImpl &Worklist) { bool UseOdr = (Flags & DWARFLinker::TF_DependencyWalk) ? (Flags & DWARFLinker::TF_ODR) @@ -700,7 +700,7 @@ void DWARFLinker::lookForParentDIEsToKeep( /// The return value indicates whether the DIE is incomplete. void DWARFLinker::lookForDIEsToKeep(AddressesMap &AddressesMap, RangesTy &Ranges, const UnitListTy &Units, - const DWARFDie &Die, const DwarfFile &File, + const DWARFDie &Die, const DWARFFile &File, CompileUnit &Cu, unsigned Flags) { // LIFO work list. SmallVector Worklist; @@ -838,7 +838,7 @@ unsigned DWARFLinker::DIECloner::cloneStringAttribute( unsigned DWARFLinker::DIECloner::cloneDieReferenceAttribute( DIE &Die, const DWARFDie &InputDIE, AttributeSpec AttrSpec, - unsigned AttrSize, const DWARFFormValue &Val, const DwarfFile &File, + unsigned AttrSize, const DWARFFormValue &Val, const DWARFFile &File, CompileUnit &Unit) { const DWARFUnit &U = Unit.getOrigUnit(); uint64_t Ref = *Val.getAsReference(); @@ -910,7 +910,7 @@ unsigned DWARFLinker::DIECloner::cloneDieReferenceAttribute( } void DWARFLinker::DIECloner::cloneExpression( - DataExtractor &Data, DWARFExpression Expression, const DwarfFile &File, + DataExtractor &Data, DWARFExpression Expression, const DWARFFile &File, CompileUnit &Unit, SmallVectorImpl &OutputBuffer) { using Encoding = DWARFExpression::Operation::Encoding; @@ -975,7 +975,7 @@ void DWARFLinker::DIECloner::cloneExpression( } unsigned DWARFLinker::DIECloner::cloneBlockAttribute( - DIE &Die, const DwarfFile &File, CompileUnit &Unit, AttributeSpec AttrSpec, + DIE &Die, const DWARFFile &File, CompileUnit &Unit, AttributeSpec AttrSpec, const DWARFFormValue &Val, unsigned AttrSize, bool IsLittleEndian) { DIEValueList *Attr; DIEValue Value; @@ -1087,7 +1087,7 @@ unsigned DWARFLinker::DIECloner::cloneAddressAttribute( } unsigned DWARFLinker::DIECloner::cloneScalarAttribute( - DIE &Die, const DWARFDie &InputDIE, const DwarfFile &File, + DIE &Die, const DWARFDie &InputDIE, const DWARFFile &File, CompileUnit &Unit, AttributeSpec AttrSpec, const DWARFFormValue &Val, unsigned AttrSize, AttributesInfo &Info) { uint64_t Value; @@ -1155,7 +1155,7 @@ unsigned DWARFLinker::DIECloner::cloneScalarAttribute( /// value \p Val, and add it to \p Die. /// \returns the size of the cloned attribute. unsigned DWARFLinker::DIECloner::cloneAttribute( - DIE &Die, const DWARFDie &InputDIE, const DwarfFile &File, + DIE &Die, const DWARFDie &InputDIE, const DWARFFile &File, CompileUnit &Unit, OffsetsStringPool &StringPool, const DWARFFormValue &Val, const AttributeSpec AttrSpec, unsigned AttrSize, AttributesInfo &Info, bool IsLittleEndian) { @@ -1273,7 +1273,7 @@ shouldSkipAttribute(DWARFAbbreviationDeclaration::AttributeSpec AttrSpec, } DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE, - const DwarfFile &File, CompileUnit &Unit, + const DWARFFile &File, CompileUnit &Unit, OffsetsStringPool &StringPool, int64_t PCOffset, uint32_t OutOffset, unsigned Flags, bool IsLittleEndian, @@ -1483,7 +1483,7 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE, /// to point at the new entries. void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit, DWARFContext &OrigDwarf, - const DwarfFile &File) const { + const DWARFFile &File) const { DWARFDebugRangeList RangeList; const auto &FunctionRanges = Unit.getFunctionRanges(); unsigned AddressSize = Unit.getOrigUnit().getAddressByteSize(); @@ -1590,7 +1590,7 @@ static void patchStmtList(DIE &Die, DIEInteger Offset) { /// are present in the binary. void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, DWARFContext &OrigDwarf, - const DwarfFile &File) { + const DWARFFile &File) { DWARFDie CUDie = Unit.getOrigUnit().getUnitDIE(); auto StmtList = dwarf::toSectionOffset(CUDie.find(dwarf::DW_AT_stmt_list)); if (!StmtList) @@ -1790,7 +1790,7 @@ void DWARFLinker::emitDwarfAcceleratorEntriesForUnit(CompileUnit &Unit) { /// This is actually pretty easy as the data of the CIEs and FDEs can /// be considered as black boxes and moved as is. The only thing to do /// is to patch the addresses in the headers. -void DWARFLinker::patchFrameInfoForObject(const DwarfFile &File, +void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File, RangesTy &Ranges, DWARFContext &OrigDwarf, unsigned AddrSize) { @@ -1887,7 +1887,7 @@ void DWARFLinker::DIECloner::copyAbbrev( uint32_t DWARFLinker::DIECloner::hashFullyQualifiedName(DWARFDie DIE, CompileUnit &U, - const DwarfFile &File, + const DWARFFile &File, int ChildRecurseDepth) { const char *Name = nullptr; DWARFUnit *OrigUnit = &U.getOrigUnit(); @@ -1952,7 +1952,7 @@ static std::string remapPath(StringRef Path, } bool DWARFLinker::registerModuleReference( - DWARFDie CUDie, const DWARFUnit &Unit, const DwarfFile &File, + DWARFDie CUDie, const DWARFUnit &Unit, const DWARFFile &File, OffsetsStringPool &StringPool, UniquingStringPool &UniquingStringPool, DeclContextTree &ODRContexts, uint64_t ModulesEndOffset, unsigned &UnitID, bool IsLittleEndian, unsigned Indent, bool Quiet) { @@ -2011,7 +2011,7 @@ bool DWARFLinker::registerModuleReference( Error DWARFLinker::loadClangModule( DWARFDie CUDie, StringRef Filename, StringRef ModuleName, uint64_t DwoId, - const DwarfFile &File, OffsetsStringPool &StringPool, + const DWARFFile &File, OffsetsStringPool &StringPool, UniquingStringPool &UniquingStringPool, DeclContextTree &ODRContexts, uint64_t ModulesEndOffset, unsigned &UnitID, bool IsLittleEndian, unsigned Indent, bool Quiet) { @@ -2096,7 +2096,7 @@ Error DWARFLinker::loadClangModule( } uint64_t DWARFLinker::DIECloner::cloneAllCompileUnits( - DWARFContext &DwarfContext, const DwarfFile &File, + DWARFContext &DwarfContext, const DWARFFile &File, OffsetsStringPool &StringPool, bool IsLittleEndian) { uint64_t OutputDebugInfoSize = Linker.Options.NoOutput ? 0 : Emitter->getDebugInfoSectionSize(); @@ -2190,7 +2190,7 @@ void DWARFLinker::updateAccelKind(DWARFContext &Dwarf) { } } -bool DWARFLinker::emitPaperTrailWarnings(const DwarfFile &File, +bool DWARFLinker::emitPaperTrailWarnings(const DWARFFile &File, OffsetsStringPool &StringPool) { if (File.Warnings.empty()) @@ -2267,7 +2267,7 @@ void DWARFLinker::copyInvariantDebugSection(DWARFContext &Dwarf) { "debug_aranges"); } -void DWARFLinker::addObjectFile(DwarfFile &File) { +void DWARFLinker::addObjectFile(DWARFFile &File) { ObjectContexts.emplace_back(LinkContext(File)); if (ObjectContexts.back().File.Dwarf) diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp index 07237e220c310..3c71567b54bbf 100644 --- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp +++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp @@ -262,7 +262,7 @@ static Error emitRemarks(const LinkOptions &Options, StringRef BinaryPath, return Error::success(); } -ErrorOr +ErrorOr DwarfLinkerForBinary::loadObject(const DebugMapObject &Obj, const DebugMap &DebugMap, remarks::RemarkLinker &RL) { @@ -274,7 +274,7 @@ DwarfLinkerForBinary::loadObject(const DebugMapObject &Obj, AddressMapForLinking.push_back( std::make_unique(*this, *ErrorOrObj, Obj)); - ObjectsForLinking.push_back(std::make_unique( + ObjectsForLinking.push_back(std::make_unique( Obj.getObjectFilename(), ContextForLinking.back().get(), AddressMapForLinking.back().get(), Obj.empty() ? Obj.getWarnings() : EmptyWarnings)); @@ -334,7 +334,7 @@ bool DwarfLinkerForBinary::link(const DebugMap &Map) { }); GeneralLinker.setObjFileLoader( [&DebugMap, &RL, this](StringRef ContainerName, - StringRef Path) -> ErrorOr { + StringRef Path) -> ErrorOr { auto &Obj = DebugMap.addDebugMapObject( Path, sys::TimePoint(), MachO::N_OSO); @@ -429,7 +429,7 @@ bool DwarfLinkerForBinary::link(const DebugMap &Map) { if (auto ErrorOrObj = loadObject(*Obj, Map, RL)) GeneralLinker.addObjectFile(*ErrorOrObj); else { - ObjectsForLinking.push_back(std::make_unique( + ObjectsForLinking.push_back(std::make_unique( Obj->getObjectFilename(), nullptr, nullptr, Obj->empty() ? Obj->getWarnings() : EmptyWarnings)); GeneralLinker.addObjectFile(*ObjectsForLinking.back()); diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.h b/llvm/tools/dsymutil/DwarfLinkerForBinary.h index 7cabacbb993b7..842b27c70ab41 100644 --- a/llvm/tools/dsymutil/DwarfLinkerForBinary.h +++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.h @@ -167,7 +167,7 @@ class DwarfLinkerForBinary { /// Attempt to load a debug object from disk. ErrorOr loadObject(const DebugMapObject &Obj, const Triple &triple); - ErrorOr loadObject(const DebugMapObject &Obj, + ErrorOr loadObject(const DebugMapObject &Obj, const DebugMap &DebugMap, remarks::RemarkLinker &RL); @@ -175,7 +175,7 @@ class DwarfLinkerForBinary { BinaryHolder &BinHolder; LinkOptions Options; std::unique_ptr Streamer; - std::vector> ObjectsForLinking; + std::vector> ObjectsForLinking; std::vector> ContextForLinking; std::vector> AddressMapForLinking; std::vector EmptyWarnings; From cf4aa68388025a731236bc6dbe113ffdfe14c6c2 Mon Sep 17 00:00:00 2001 From: Alexey Baturo Date: Sun, 4 Oct 2020 16:51:04 +0300 Subject: [PATCH 005/321] [RISCV][ASAN] mark asan as supported for RISCV64 and enable tests [11/11] patch series to port ASAN for riscv64 These changes allow using ASAN on RISCV64 architecture. The majority of existing tests are passing. With few exceptions (see below). Tests we run on qemu and on "HiFive Unleashed" board. Tests run: ``` Asan-riscv64-inline-Test - pass Asan-riscv64-inline-Noinst-Test - pass Asan-riscv64-calls-Noinst-Test - pass Asan-riscv64-calls-Test - pass ``` Lit tests: ``` RISCV64LinuxConfig (282 supported, few failures) RISCV64LinuxDynamicConfig (289 supported, few failures) ``` Lit failures: ``` TestCases/malloc_context_size.cpp - asan works, but backtrace misses some calls TestCases/Linux/malloc_delete_mismatch.cpp - asan works, but backtrace misses some calls TestCases/Linux/static_tls.cpp - "Can't guess glibc version" (under debugging) TestCases/asan_and_llvm_coverage_test.cpp - missing libclang_rt.profile-riscv64.a ``` These failures are under debugging currently and shall be addressed in a subsequent commits. Depends On D87581 Reviewed By: eugenis, vitalybuka Differential Revision: https://reviews.llvm.org/D87582 --- compiler-rt/cmake/config-ix.cmake | 2 +- compiler-rt/lib/asan/scripts/asan_symbolize.py | 3 ++- compiler-rt/lib/asan/tests/asan_test.cpp | 7 ++++--- compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt | 2 +- compiler-rt/test/asan/CMakeLists.txt | 2 +- compiler-rt/test/asan/TestCases/Linux/ptrace.cpp | 8 ++++++++ compiler-rt/test/asan/TestCases/Linux/vfork.cpp | 2 +- 7 files changed, 18 insertions(+), 8 deletions(-) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 1428a514b55a4..23a47f2b85397 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -294,7 +294,7 @@ endif() set(ALL_SANITIZER_COMMON_SUPPORTED_ARCH ${X86} ${X86_64} ${PPC64} ${RISCV64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9}) -set(ALL_ASAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} +set(ALL_ASAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV64} ${MIPS32} ${MIPS64} ${PPC64} ${S390X} ${SPARC} ${SPARCV9}) set(ALL_CRT_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV32} ${RISCV64}) set(ALL_DFSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64}) diff --git a/compiler-rt/lib/asan/scripts/asan_symbolize.py b/compiler-rt/lib/asan/scripts/asan_symbolize.py index 1e78cb1b0e77a..5c4001acf8c65 100755 --- a/compiler-rt/lib/asan/scripts/asan_symbolize.py +++ b/compiler-rt/lib/asan/scripts/asan_symbolize.py @@ -49,7 +49,8 @@ def fix_filename(file_name): def is_valid_arch(s): return s in ["i386", "x86_64", "x86_64h", "arm", "armv6", "armv7", "armv7s", - "armv7k", "arm64", "powerpc64", "powerpc64le", "s390x", "s390"] + "armv7k", "arm64", "powerpc64", "powerpc64le", "s390x", "s390", + "riscv64"] def guess_arch(addr): # Guess which arch we're running. 10 = len('0x') + 8 hex digits. diff --git a/compiler-rt/lib/asan/tests/asan_test.cpp b/compiler-rt/lib/asan/tests/asan_test.cpp index edc98ed185202..51a527359b499 100644 --- a/compiler-rt/lib/asan/tests/asan_test.cpp +++ b/compiler-rt/lib/asan/tests/asan_test.cpp @@ -621,9 +621,9 @@ NOINLINE void SigLongJmpFunc1(sigjmp_buf buf) { siglongjmp(buf, 1); } -#if !defined(__ANDROID__) && !defined(__arm__) && \ - !defined(__aarch64__) && !defined(__mips__) && \ - !defined(__mips64) && !defined(__s390__) +#if !defined(__ANDROID__) && !defined(__arm__) && !defined(__aarch64__) && \ + !defined(__mips__) && !defined(__mips64) && !defined(__s390__) && \ + !defined(__riscv) NOINLINE void BuiltinLongJmpFunc1(jmp_buf buf) { // create three red zones for these two stack objects. int a; @@ -648,6 +648,7 @@ TEST(AddressSanitizer, BuiltinLongJmpTest) { #endif // !defined(__ANDROID__) && !defined(__arm__) && // !defined(__aarch64__) && !defined(__mips__) // !defined(__mips64) && !defined(__s390__) + // !defined(__riscv) TEST(AddressSanitizer, UnderscopeLongJmpTest) { static jmp_buf buf; diff --git a/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt b/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt index 3c504022ebe7c..96c845d81cf4c 100644 --- a/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt +++ b/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt @@ -3,7 +3,7 @@ include(CompilerRTCompile) clang_compiler_add_cxx_check() # FIXME: use SANITIZER_COMMON_SUPPORTED_ARCH here -filter_available_targets(SANITIZER_UNITTEST_SUPPORTED_ARCH x86_64 i386 mips64 mips64el) +filter_available_targets(SANITIZER_UNITTEST_SUPPORTED_ARCH x86_64 i386 mips64 mips64el riscv64) if(APPLE) darwin_filter_host_archs(SANITIZER_UNITTEST_SUPPORTED_ARCH SANITIZER_UNITTEST_SUPPORTED_ARCH) endif() diff --git a/compiler-rt/test/asan/CMakeLists.txt b/compiler-rt/test/asan/CMakeLists.txt index 1c2633eb4597b..855fac4f039f6 100644 --- a/compiler-rt/test/asan/CMakeLists.txt +++ b/compiler-rt/test/asan/CMakeLists.txt @@ -14,7 +14,7 @@ if(OS_NAME MATCHES "Windows" AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND endif() macro(get_bits_for_arch arch bits) - if (${arch} MATCHES "x86_64|powerpc64|powerpc64le|aarch64|arm64|mips64|mips64el|s390x|sparcv9") + if (${arch} MATCHES "x86_64|powerpc64|powerpc64le|aarch64|arm64|mips64|mips64el|s390x|sparcv9|riscv64") set(${bits} 64) elseif (${arch} MATCHES "i386|arm|mips|mipsel|sparc") set(${bits} 32) diff --git a/compiler-rt/test/asan/TestCases/Linux/ptrace.cpp b/compiler-rt/test/asan/TestCases/Linux/ptrace.cpp index 255bfe6b782e0..21743cfdd9aff 100644 --- a/compiler-rt/test/asan/TestCases/Linux/ptrace.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/ptrace.cpp @@ -66,6 +66,14 @@ typedef _user_fpregs_struct fpregs_struct; #define PRINT_REG_PC(__regs) printf ("%lx\n", (unsigned long) (__regs.psw.addr)) #define PRINT_REG_FP(__fpregs) printf ("%lx\n", (unsigned long) (__fpregs.fpc)) #define ARCH_IOVEC_FOR_GETREGSET + +#elif defined(__riscv) && (__riscv_xlen == 64) +#include +typedef user_regs_struct regs_struct; +typedef __riscv_q_ext_state fpregs_struct; +#define PRINT_REG_PC(__regs) printf("%lx\n", (unsigned long)(__regs.pc)) +#define PRINT_REG_FP(__fpregs) printf("%lx\n", (unsigned long)(__fpregs.fcsr)) +#define ARCH_IOVEC_FOR_GETREGSET #endif diff --git a/compiler-rt/test/asan/TestCases/Linux/vfork.cpp b/compiler-rt/test/asan/TestCases/Linux/vfork.cpp index 31a32dc56cd02..4c0f02c5088e4 100644 --- a/compiler-rt/test/asan/TestCases/Linux/vfork.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/vfork.cpp @@ -1,7 +1,7 @@ // https://github.com/google/sanitizers/issues/925 // RUN: %clang_asan -O0 %s -o %t && %run %t 2>&1 -// REQUIRES: aarch64-target-arch || x86_64-target-arch || i386-target-arch || arm-target-arch +// REQUIRES: aarch64-target-arch || x86_64-target-arch || i386-target-arch || arm-target-arch || riscv64-target-arch #include #include From a3caf7f6102dc863425f9714b099af58397f0cd2 Mon Sep 17 00:00:00 2001 From: Markus Lavin Date: Mon, 5 Oct 2020 09:27:30 +0200 Subject: [PATCH 006/321] [DebugInfo] Improve dbg preservation in LSR. Use SCEV to salvage additional @llvm.dbg.value that have turned into referencing undef after transformation (and traditional salvageDebugInfo). Before transformation compute SCEV for each @llvm.dbg.value in the loop body and store it (along side its current DIExpression). After transformation update those @llvm.dbg.value now referencing undef by comparing its stored SCEV to the SCEV of the current loop-header PHI-nodes. Allow match with offset by inserting compensation code in the DIExpression. Fixes : PR38815 Differential Revision: https://reviews.llvm.org/D87494 --- llvm/include/llvm/Analysis/ScalarEvolution.h | 18 ++--- .../Transforms/Scalar/LoopStrengthReduce.cpp | 57 ++++++++++++++ llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll | 2 +- .../LoopStrengthReduce/dbg-preserve-0.ll | 74 +++++++++++++++++++ 4 files changed, 141 insertions(+), 10 deletions(-) create mode 100644 llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 158257a5aa9a1..ac6090a30d2ff 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1117,6 +1117,15 @@ class ScalarEvolution { const SCEV *S, const Loop *L, SmallPtrSetImpl &Preds); + /// Compute \p LHS - \p RHS and returns the result as an APInt if it is a + /// constant, and None if it isn't. + /// + /// This is intended to be a cheaper version of getMinusSCEV. We can be + /// frugal here since we just bail out of actually constructing and + /// canonicalizing an expression in the cases where the result isn't going + /// to be a constant. + Optional computeConstantDifference(const SCEV *LHS, const SCEV *RHS); + private: /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a /// Value is deleted. @@ -1799,15 +1808,6 @@ class ScalarEvolution { bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R, SCEV::NoWrapFlags &Flags); - /// Compute \p LHS - \p RHS and returns the result as an APInt if it is a - /// constant, and None if it isn't. - /// - /// This is intended to be a cheaper version of getMinusSCEV. We can be - /// frugal here since we just bail out of actually constructing and - /// canonicalizing an expression in the cases where the result isn't going - /// to be a constant. - Optional computeConstantDifference(const SCEV *LHS, const SCEV *RHS); - /// Drop memoized information computed for S. void forgetMemoizedResults(const SCEV *S); diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 537838e2bdc19..fdb41f3e4b14b 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -59,6 +59,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -80,6 +81,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalValue.h" @@ -5776,6 +5778,27 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, if (MSSA) MSSAU = std::make_unique(MSSA); + // Debug preservation - record all llvm.dbg.value from the loop as well as + // the SCEV of their variable location. Since salvageDebugInfo may change the + // DIExpression we need to store the original here as well (i.e. it needs to + // be in sync with the SCEV). + SmallVector< + std::tuple, + 32> + DbgValues; + for (auto &B : L->getBlocks()) { + for (auto &I : *B) { + if (DbgValueInst *D = dyn_cast(&I)) { + auto V = D->getVariableLocation(); + if (!SE.isSCEVable(V->getType())) + continue; + auto DS = SE.getSCEV(V); + DbgValues.push_back( + std::make_tuple(D, V->getType(), DS, D->getExpression())); + } + } + } + // Run the main LSR transformation. Changed |= LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()).getChanged(); @@ -5797,6 +5820,40 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); } } + // Debug preservation - go through all recorded llvm.dbg.value and for those + // that now have an undef variable location use the recorded SCEV to try and + // update it. Compare with SCEV of Phi-nodes of loop header to find a + // suitable update candidate. SCEV match with constant offset is allowed and + // will be compensated for in the DIExpression. + if (Changed) { + for (auto &D : DbgValues) { + auto DbgValue = std::get(D); + auto DbgValueType = std::get(D); + auto DbgValueSCEV = std::get(D); + auto DbgDIExpr = std::get(D); + if (!isa(DbgValue->getVariableLocation())) + continue; + for (PHINode &Phi : L->getHeader()->phis()) { + if (DbgValueType != Phi.getType()) + continue; + if (!SE.isSCEVable(Phi.getType())) + continue; + auto PhiSCEV = SE.getSCEV(&Phi); + if (Optional Offset = + SE.computeConstantDifference(DbgValueSCEV, PhiSCEV)) { + auto &Ctx = DbgValue->getContext(); + DbgValue->setOperand( + 0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(&Phi))); + if (Offset.getValue().getSExtValue()) { + SmallVector Ops; + DIExpression::appendOffset(Ops, Offset.getValue().getSExtValue()); + DbgDIExpr = DIExpression::prependOpcodes(DbgDIExpr, Ops, true); + } + DbgValue->setOperand(2, MetadataAsValue::get(Ctx, DbgDIExpr)); + } + } + } + } return Changed; } diff --git a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll index 08aecdac5b794..e8f37a370666c 100644 --- a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll +++ b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll @@ -33,7 +33,7 @@ ; ASM: popl %ebx ; ASM: [[EPILOGUE]]: # %return ; ASM: retl $8 -; ASM: Ltmp10: +; ASM: Ltmp11: ; ASM: .cv_fpo_endproc ; Note how RvaStart advances 7 bytes to skip the shrink-wrapped portion. diff --git a/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll b/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll new file mode 100644 index 0000000000000..71031aabb95b7 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll @@ -0,0 +1,74 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s + +; Test that LSR preserves debug-info for induction variables. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define dso_local void @foo(i8* nocapture %p) local_unnamed_addr !dbg !7 { +; CHECK-LABEL: @foo( +entry: + call void @llvm.dbg.value(metadata i8* %p, metadata !13, metadata !DIExpression()), !dbg !16 + call void @llvm.dbg.value(metadata i8 0, metadata !14, metadata !DIExpression()), !dbg !17 + br label %for.body, !dbg !18 + +for.cond.cleanup: ; preds = %for.body + ret void, !dbg !19 + +for.body: ; preds = %entry, %for.body +; CHECK-LABEL: for.body: + %i.06 = phi i8 [ 0, %entry ], [ %inc, %for.body ] + %p.addr.05 = phi i8* [ %p, %entry ], [ %add.ptr, %for.body ] + call void @llvm.dbg.value(metadata i8 %i.06, metadata !14, metadata !DIExpression()), !dbg !17 + call void @llvm.dbg.value(metadata i8* %p.addr.05, metadata !13, metadata !DIExpression()), !dbg !16 +; CHECK-NOT: call void @llvm.dbg.value(metadata i8* undef +; CHECK: call void @llvm.dbg.value(metadata i8* %lsr.iv, metadata ![[MID_p:[0-9]+]], metadata !DIExpression(DW_OP_constu, 3, DW_OP_minus, DW_OP_stack_value)), !dbg !16 + %add.ptr = getelementptr inbounds i8, i8* %p.addr.05, i64 3, !dbg !20 + call void @llvm.dbg.value(metadata i8* %add.ptr, metadata !13, metadata !DIExpression()), !dbg !16 +; CHECK-NOT: call void @llvm.dbg.value(metadata i8* undef +; CHECK: call void @llvm.dbg.value(metadata i8* %lsr.iv, metadata ![[MID_p]], metadata !DIExpression()), !dbg !16 + store i8 %i.06, i8* %add.ptr, align 1, !dbg !23, !tbaa !24 + %inc = add nuw nsw i8 %i.06, 1, !dbg !27 + call void @llvm.dbg.value(metadata i8 %inc, metadata !14, metadata !DIExpression()), !dbg !17 + %exitcond.not = icmp eq i8 %inc, 32, !dbg !28 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !18, !llvm.loop !29 +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "lsrdbg.c", directory: "/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 12.0.0"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10} +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) +!11 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char) +!12 = !{!13, !14} +!13 = !DILocalVariable(name: "p", arg: 1, scope: !7, file: !1, line: 2, type: !10) +; CHECK: ![[MID_p]] = !DILocalVariable(name: "p", arg: 1, scope: !7, file: !1, line: 2, type: !10) +!14 = !DILocalVariable(name: "i", scope: !15, file: !1, line: 4, type: !11) +!15 = distinct !DILexicalBlock(scope: !7, file: !1, line: 4, column: 3) +!16 = !DILocation(line: 0, scope: !7) +!17 = !DILocation(line: 0, scope: !15) +!18 = !DILocation(line: 4, column: 3, scope: !15) +!19 = !DILocation(line: 8, column: 1, scope: !7) +!20 = !DILocation(line: 5, column: 7, scope: !21) +!21 = distinct !DILexicalBlock(scope: !22, file: !1, line: 4, column: 42) +!22 = distinct !DILexicalBlock(scope: !15, file: !1, line: 4, column: 3) +!23 = !DILocation(line: 6, column: 8, scope: !21) +!24 = !{!25, !25, i64 0} +!25 = !{!"omnipotent char", !26, i64 0} +!26 = !{!"Simple C/C++ TBAA"} +!27 = !DILocation(line: 4, column: 38, scope: !22) +!28 = !DILocation(line: 4, column: 31, scope: !22) +!29 = distinct !{!29, !18, !30, !31} +!30 = !DILocation(line: 7, column: 3, scope: !15) +!31 = !{!"llvm.loop.unroll.disable"} From 3423d5c9da812b0076d1cf14e96ce453e35257b6 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Mon, 5 Oct 2020 10:35:29 +0200 Subject: [PATCH 007/321] [AST][RecoveryExpr] Popagate the error-bit from a VarDecl's initializer to DeclRefExpr. The error-bit was missing, if a DeclRefExpr (which refers to a VarDecl with a contains-errors initializer). It could cause different violations in clang -- the DeclRefExpr is value-dependent, but not contains-errors, `ABC` could produce a non-error and non-dependent type in non-template context, which will lead to crashes in constexpr evaluation. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D86048 --- clang/lib/AST/ComputeDependence.cpp | 8 +++++--- clang/test/Sema/invalid-member.cpp | 8 ++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/ComputeDependence.cpp b/clang/lib/AST/ComputeDependence.cpp index 320025e5fc823..f8dfeed0962ea 100644 --- a/clang/lib/AST/ComputeDependence.cpp +++ b/clang/lib/AST/ComputeDependence.cpp @@ -466,10 +466,12 @@ ExprDependence clang::computeDependence(DeclRefExpr *E, const ASTContext &Ctx) { : Var->getType()->isIntegralOrEnumerationType()) && (Var->getType().isConstQualified() || Var->getType()->isReferenceType())) { - if (const Expr *Init = Var->getAnyInitializer()) - if (Init->isValueDependent()) { + if (const Expr *Init = Var->getAnyInitializer()) { + if (Init->isValueDependent()) Deps |= ExprDependence::ValueInstantiation; - } + if (Init->containsErrors()) + Deps |= ExprDependence::Error; + } } // (VD) - FIXME: Missing from the standard: diff --git a/clang/test/Sema/invalid-member.cpp b/clang/test/Sema/invalid-member.cpp index 9559ead082f05..57ee187ccf4d5 100644 --- a/clang/test/Sema/invalid-member.cpp +++ b/clang/test/Sema/invalid-member.cpp @@ -19,3 +19,11 @@ class Z { }; // Should be able to evaluate sizeof without crashing. static_assert(sizeof(Z) == 1, "No valid members"); + +constexpr int N = undef; // expected-error {{use of undeclared identifier}} +template +class ABC {}; +class T { + ABC abc; +}; +static_assert(sizeof(T) == 1, "No valid members"); From 64b879ae2a8a4a4e541404c19d96d18c4aed810e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20Hjort=20=C3=85kerlund?= Date: Mon, 5 Oct 2020 10:28:50 +0200 Subject: [PATCH 008/321] [TableGen][GlobalISel] add handling of nested *_SUBREG When nesting INSERT_SUBREG and EXTRACT_SUBREG, GlobalISelEmitter would fail to find the register class of the nested node. This patch fixes that for registers with subregs. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D88487 --- .../GlobalISelEmitter-nested-subregs.td | 66 +++++++++++++++++++ llvm/utils/TableGen/GlobalISelEmitter.cpp | 11 ++++ 2 files changed, 77 insertions(+) create mode 100644 llvm/test/TableGen/GlobalISelEmitter-nested-subregs.td diff --git a/llvm/test/TableGen/GlobalISelEmitter-nested-subregs.td b/llvm/test/TableGen/GlobalISelEmitter-nested-subregs.td new file mode 100644 index 0000000000000..c48c82aa142ac --- /dev/null +++ b/llvm/test/TableGen/GlobalISelEmitter-nested-subregs.td @@ -0,0 +1,66 @@ +// RUN: llvm-tblgen %s -gen-global-isel -optimize-match-table=false -I %p/../../include -I %p/Common -o - | FileCheck %s + +include "llvm/Target/Target.td" +include "GlobalISelEmitterCommon.td" + +let Namespace = "MyTarget" in { + +def lo8 : SubRegIndex<8>; +def hi8 : SubRegIndex<8, 8>; +def lo16 : SubRegIndex<16>; +def hi16 : SubRegIndex<16, 16>; + +def a0bl : Register<"a0bl">; +def a0bh : Register<"a0bh">; +def a0wh : Register<"a0wh">; + +} // Namespace = "MyTarget" + +def a0wl: RegisterWithSubRegs<"a0", [a0bh, a0bl]> { + let SubRegIndices = [hi8, lo8]; + let CoveredBySubRegs = 1; +} + +def a0: RegisterWithSubRegs<"a0", [a0wh, a0wl]> { + let SubRegIndices = [hi16, lo16]; + let CoveredBySubRegs = 1; +} + +def A0b : RegisterClass<"MyTarget", [i8], 8, (add a0bl)>; +def A0w : RegisterClass<"MyTarget", [i16], 16, (add a0wl)>; +def A0 : RegisterClass<"MyTarget", [i32], 32, (add a0)>; + +// CHECK: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2, +// CHECK-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_ANYEXT, +// CHECK-NEXT: // MIs[0] dst +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s16, +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/MyTarget::A0RegClassID, +// CHECK-NEXT: // MIs[0] src +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s8, +// CHECK-NEXT: // (anyext:{ *:[i16] } i8:{ *:[i8] }:$src) => (EXTRACT_SUBREG:{ *:[i16] } (INSERT_SUBREG:{ *:[i32] } (IMPLICIT_DEF:{ *:[i32] }), A0b:{ *:[i8] }:$src, lo8:{ *:[i32] }), lo16:{ *:[i32] }) +// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32, +// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/1, /*TypeID*/GILLT_s32, +// CHECK-NEXT: GIR_BuildMI, /*InsnID*/2, /*Opcode*/TargetOpcode::IMPLICIT_DEF, +// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/2, /*TempRegID*/1, /*TempRegFlags*/RegState::Define, +// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/2, +// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::INSERT_SUBREG, +// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/RegState::Define, +// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/1, /*TempRegFlags*/0, +// CHECK-NEXT: GIR_Copy, /*NewInsnID*/1, /*OldInsnID*/0, /*OpIdx*/1, // src +// CHECK-NEXT: GIR_AddImm, /*InsnID*/1, /*Imm*/3, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, MyTarget::A0RegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, MyTarget::A0RegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/2, MyTarget::A0bRegClassID, +// CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::COPY, +// CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst +// CHECK-NEXT: GIR_AddTempSubRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0, MyTarget::lo16, +// CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, MyTarget::A0wRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, MyTarget::A0RegClassID, +def : Pat<(i16 (anyext i8:$src)), + (i16 (EXTRACT_SUBREG + (i32 (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), + A0b:$src, + lo8)), + lo16))>; diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index 752ca109d1cb3..ec7d20692096b 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -4951,6 +4951,17 @@ GlobalISelEmitter::inferRegClassFromPattern(TreePatternNode *N) { return None; return getRegClassFromLeaf(RCChild); } + if (InstName == "INSERT_SUBREG") { + TreePatternNode *Child0 = N->getChild(0); + assert(Child0->getNumTypes() == 1 && "Unexpected number of types!"); + const TypeSetByHwMode &VTy = Child0->getExtType(0); + return inferSuperRegisterClassForNode(VTy, Child0, N->getChild(2)); + } + if (InstName == "EXTRACT_SUBREG") { + assert(N->getNumTypes() == 1 && "Unexpected number of types!"); + const TypeSetByHwMode &VTy = N->getExtType(0); + return inferSuperRegisterClass(VTy, N->getChild(1)); + } // Handle destination record types that we can safely infer a register class // from. From 0d5989bb24934802a9e6fcca63848a57a91efcc8 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 25 Sep 2020 15:55:02 +0100 Subject: [PATCH 009/321] [AMDGPU] Split R600 and GCN bfe patterns This is in preparation for making the GCN patterns divergence-aware. NFC. Differential Revision: https://reviews.llvm.org/D88579 --- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 47 ------------------- .../Target/AMDGPU/EvergreenInstructions.td | 45 +++++++++++++++++- llvm/lib/Target/AMDGPU/SIInstructions.td | 45 +++++++++++++++++- 3 files changed, 88 insertions(+), 49 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 23e47c6cc14d3..48b82ce395b9c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -597,53 +597,6 @@ class DwordAddrPat : AMDGPUPat < (vt rc:$addr) >; -// Bitfield extract patterns - -def IMMZeroBasedBitfieldMask : ImmLeaf ; - -def IMMPopCount : SDNodeXFormgetTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N), - MVT::i32); -}]>; - -multiclass BFEPattern { - def : AMDGPUPat < - (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)), - (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask)))) - >; - - // x & ((1 << y) - 1) - def : AMDGPUPat < - (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)), - (UBFE $src, (MOV (i32 0)), $width) - >; - - // x & ~(-1 << y) - def : AMDGPUPat < - (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)), - (UBFE $src, (MOV (i32 0)), $width) - >; - - // x & (-1 >> (bitwidth - y)) - def : AMDGPUPat < - (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))), - (UBFE $src, (MOV (i32 0)), $width) - >; - - // x << (bitwidth - y) >> (bitwidth - y) - def : AMDGPUPat < - (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), - (UBFE $src, (MOV (i32 0)), $width) - >; - - def : AMDGPUPat < - (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), - (SBFE $src, (MOV (i32 0)), $width) - >; -} - // fshr pattern class FSHRPattern : AMDGPUPat < (fshr i32:$src0, i32:$src1, i32:$src2), diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td index a2782bf8b67d6..cd9c056929c5a 100644 --- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td @@ -30,6 +30,15 @@ class EGOrCaymanPat : AMDGPUPat { let SubtargetPredicate = isEGorCayman; } +def IMMZeroBasedBitfieldMask : ImmLeaf ; + +def IMMPopCount : SDNodeXFormgetTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N), + MVT::i32); +}]>; + //===----------------------------------------------------------------------===// // Evergreen / Cayman store instructions //===----------------------------------------------------------------------===// @@ -394,7 +403,41 @@ def BFE_INT_eg : R600_3OP <0x5, "BFE_INT", VecALU >; -defm : BFEPattern ; +// Bitfield extract patterns + +def : AMDGPUPat < + (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask), + (BFE_UINT_eg $src, $rshift, (MOV_IMM_I32 (i32 (IMMPopCount $mask)))) +>; + +// x & ((1 << y) - 1) +def : AMDGPUPat < + (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)), + (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width) +>; + +// x & ~(-1 << y) +def : AMDGPUPat < + (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)), + (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width) +>; + +// x & (-1 >> (bitwidth - y)) +def : AMDGPUPat < + (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))), + (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width) +>; + +// x << (bitwidth - y) >> (bitwidth - y) +def : AMDGPUPat < + (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), + (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width) +>; + +def : AMDGPUPat < + (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), + (BFE_INT_eg $src, (MOV_IMM_I32 (i32 0)), $width) +>; def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))], diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 817fa0bf3ac7a..d55cf0fc65ec6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2315,7 +2315,50 @@ multiclass BFMPatterns { defm : BFMPatterns ; // FIXME: defm : BFMPatterns ; -defm : BFEPattern ; +// Bitfield extract patterns + +def IMMZeroBasedBitfieldMask : ImmLeaf ; + +def IMMPopCount : SDNodeXFormgetTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N), + MVT::i32); +}]>; + +def : AMDGPUPat < + (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask), + (V_BFE_U32 $src, $rshift, (i32 (IMMPopCount $mask))) +>; + +// x & ((1 << y) - 1) +def : AMDGPUPat < + (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)), + (V_BFE_U32 $src, (i32 0), $width) +>; + +// x & ~(-1 << y) +def : AMDGPUPat < + (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)), + (V_BFE_U32 $src, (i32 0), $width) +>; + +// x & (-1 >> (bitwidth - y)) +def : AMDGPUPat < + (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))), + (V_BFE_U32 $src, (i32 0), $width) +>; + +// x << (bitwidth - y) >> (bitwidth - y) +def : AMDGPUPat < + (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), + (V_BFE_U32 $src, (i32 0), $width) +>; + +def : AMDGPUPat < + (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), + (V_BFE_I32 $src, (i32 0), $width) +>; // SHA-256 Ma patterns From 16778b19f2c2756a9e0dd04636fb2c269f684917 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 25 Sep 2020 16:07:27 +0100 Subject: [PATCH 010/321] [AMDGPU] Make bfe patterns divergence-aware This tends to increase code size but more importantly it reduces vgpr usage, and could avoid costly readfirstlanes if the result needs to be in an sgpr. Differential Revision: https://reviews.llvm.org/D88580 --- llvm/lib/Target/AMDGPU/SIInstructions.td | 16 ++++++++++------ .../test/CodeGen/AMDGPU/amdgpu.private-memory.ll | 5 ++--- llvm/test/CodeGen/AMDGPU/bfe-patterns.ll | 10 ++++++---- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll | 12 +++++++----- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index d55cf0fc65ec6..7cffe615f3b30 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2327,36 +2327,40 @@ def IMMPopCount : SDNodeXForm; def : AMDGPUPat < - (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask), + (DivergentBinFrag (i32 (srl i32:$src, i32:$rshift)), + IMMZeroBasedBitfieldMask:$mask), (V_BFE_U32 $src, $rshift, (i32 (IMMPopCount $mask))) >; // x & ((1 << y) - 1) def : AMDGPUPat < - (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)), + (DivergentBinFrag i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)), (V_BFE_U32 $src, (i32 0), $width) >; // x & ~(-1 << y) def : AMDGPUPat < - (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)), + (DivergentBinFrag i32:$src, + (xor_oneuse (shl_oneuse -1, i32:$width), -1)), (V_BFE_U32 $src, (i32 0), $width) >; // x & (-1 >> (bitwidth - y)) def : AMDGPUPat < - (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))), + (DivergentBinFrag i32:$src, (srl_oneuse -1, (sub 32, i32:$width))), (V_BFE_U32 $src, (i32 0), $width) >; // x << (bitwidth - y) >> (bitwidth - y) def : AMDGPUPat < - (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), + (DivergentBinFrag (shl_oneuse i32:$src, (sub 32, i32:$width)), + (sub 32, i32:$width)), (V_BFE_U32 $src, (i32 0), $width) >; def : AMDGPUPat < - (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), + (DivergentBinFrag (shl_oneuse i32:$src, (sub 32, i32:$width)), + (sub 32, i32:$width)), (V_BFE_I32 $src, (i32 0), $width) >; diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll index 072a76780447e..cbfd6415979d7 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -232,10 +232,9 @@ for.end: ; SI-ALLOCA: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 ; SI-PROMOTE-VECT: s_load_dword [[IDX:s[0-9]+]] -; SI-PROMOTE-VECT: s_mov_b32 [[SREG:s[0-9]+]], 0x10000 ; SI-PROMOTE-VECT: s_lshl_b32 [[SCALED_IDX:s[0-9]+]], [[IDX]], 4 -; SI-PROMOTE-VECT: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SCALED_IDX]] -; SI-PROMOTE-VECT: v_bfe_u32 v{{[0-9]+}}, [[SREG]], [[VREG]], 16 +; SI-PROMOTE-VECT: s_lshr_b32 [[SREG:s[0-9]+]], 0x10000, [[SCALED_IDX]] +; SI-PROMOTE-VECT: s_and_b32 s{{[0-9]+}}, [[SREG]], 0xffff define amdgpu_kernel void @short_array(i32 addrspace(1)* %out, i32 %index) #0 { entry: %0 = alloca [2 x i16], addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll index 69237cfabb85b..ce6340fb3953c 100644 --- a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll +++ b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll @@ -49,8 +49,9 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, ; GCN-LABEL: {{^}}s_ubfe_sub_i32: ; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}} -; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], s[[WIDTH]] -; GCN: v_bfe_u32 v{{[0-9]+}}, s[[SRC]], 0, [[VWIDTH]] +; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]] +; GCN: s_lshl_b32 [[TMP:s[0-9]+]], s[[SRC]], [[SUB]] +; GCN: s_lshr_b32 s{{[0-9]+}}, [[TMP]], [[SUB]] define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x @@ -125,8 +126,9 @@ define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, ; GCN-LABEL: {{^}}s_sbfe_sub_i32: ; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}} -; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], s[[WIDTH]] -; GCN: v_bfe_i32 v{{[0-9]+}}, s[[SRC]], 0, [[VWIDTH]] +; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]] +; GCN: s_lshl_b32 [[TMP:s[0-9]+]], s[[SRC]], [[SUB]] +; GCN: s_ashr_i32 s{{[0-9]+}}, [[TMP]], [[SUB]] define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll index 5ba8edb2c1c04..805ca6f5d8aff 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll @@ -1622,10 +1622,11 @@ define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_mov_b32_e32 v0, s3 -; SI-NEXT: v_bfe_u32 v0, s2, v0, 3 +; SI-NEXT: s_lshr_b32 s0, s2, s3 +; SI-NEXT: s_and_b32 s0, s0, 7 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: v_mov_b32_e32 v0, s0 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; @@ -1636,8 +1637,9 @@ define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v0, s1 -; VI-NEXT: v_bfe_u32 v0, s0, v0, 3 +; VI-NEXT: s_lshr_b32 s0, s0, s1 +; VI-NEXT: s_and_b32 s0, s0, 7 +; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm %c = lshr i32 %a, %b From 6e2b267d1c85ce0de0e91eb446831607896a0f2b Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 30 Sep 2020 19:19:04 +0200 Subject: [PATCH 011/321] Promote transpose from linalg to standard dialect While affine maps are part of the builtin memref type, there is very limited support for manipulating them in the standard dialect. Add transpose to the set of ops to complement the existing view/subview ops. This is a metadata transformation that encodes the transpose into the strides of a memref. I'm planning to use this when lowering operations on strided memrefs, using the transpose to remove the stride without adding a dependency on linalg dialect. Differential Revision: https://reviews.llvm.org/D88651 --- mlir/docs/Dialects/Linalg.md | 2 +- .../mlir/Dialect/Linalg/IR/LinalgOps.td | 30 ------- .../mlir/Dialect/StandardOps/IR/Ops.td | 32 +++++++ .../Conversion/LinalgToLLVM/LinalgToLLVM.cpp | 53 +---------- .../LinalgToStandard/LinalgToStandard.cpp | 10 +-- .../StandardToLLVM/StandardToLLVM.cpp | 52 +++++++++++ mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 85 ------------------ mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 90 +++++++++++++++++++ mlir/lib/Dialect/Vector/VectorOps.cpp | 28 +++--- .../StandardToLLVM/standard-to-llvm.mlir | 17 ++++ mlir/test/Dialect/Linalg/invalid.mlir | 21 ----- mlir/test/Dialect/Linalg/llvm.mlir | 16 ---- mlir/test/Dialect/Linalg/roundtrip.mlir | 4 +- mlir/test/Dialect/Linalg/standard.mlir | 4 +- mlir/test/Dialect/Standard/invalid.mlir | 21 +++++ 15 files changed, 237 insertions(+), 228 deletions(-) diff --git a/mlir/docs/Dialects/Linalg.md b/mlir/docs/Dialects/Linalg.md index 140197b168157..c6681a93e53ea 100644 --- a/mlir/docs/Dialects/Linalg.md +++ b/mlir/docs/Dialects/Linalg.md @@ -554,9 +554,9 @@ are: * `std.view`, * `std.subview`, + * `std.transpose`. * `linalg.range`, * `linalg.slice`, - * `linalg.transpose`. * `linalg.reshape`, Future ops are added on a per-need basis but should include: diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td index d74e59145705e..5b29154e0a03c 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td @@ -287,36 +287,6 @@ def Linalg_SliceOp : Linalg_Op<"slice", [ let hasFolder = 1; } -def Linalg_TransposeOp : Linalg_Op<"transpose", [NoSideEffect]>, - Arguments<(ins AnyStridedMemRef:$view, AffineMapAttr:$permutation)>, - Results<(outs AnyStridedMemRef)> { - let summary = "`transpose` produces a new strided memref (metadata-only)"; - let description = [{ - The `linalg.transpose` op produces a strided memref whose sizes and strides - are a permutation of the original `view`. This is a pure metadata - transformation. - - Example: - - ```mlir - %1 = linalg.transpose %0 (i, j) -> (j, i) : memref to memref - ``` - }]; - - let builders = [OpBuilder< - "OpBuilder &b, OperationState &result, Value view, " - "AffineMapAttr permutation, ArrayRef attrs = {}">]; - - let verifier = [{ return ::verify(*this); }]; - - let extraClassDeclaration = [{ - static StringRef getPermutationAttrName() { return "permutation"; } - ShapedType getShapedType() { return view().getType().cast(); } - }]; - - let hasFolder = 1; -} - def Linalg_YieldOp : Linalg_Op<"yield", [NoSideEffect, Terminator]>, Arguments<(ins Variadic:$values)> { let summary = "Linalg yield operation"; diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index c62be7571aad7..4a014cb7060c3 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -3428,6 +3428,38 @@ def TensorStoreOp : Std_Op<"tensor_store", let assemblyFormat = "$tensor `,` $memref attr-dict `:` type($memref)"; } +//===----------------------------------------------------------------------===// +// TransposeOp +//===----------------------------------------------------------------------===// + +def TransposeOp : Std_Op<"transpose", [NoSideEffect]>, + Arguments<(ins AnyStridedMemRef:$in, AffineMapAttr:$permutation)>, + Results<(outs AnyStridedMemRef)> { + let summary = "`transpose` produces a new strided memref (metadata-only)"; + let description = [{ + The `transpose` op produces a strided memref whose sizes and strides + are a permutation of the original `in` memref. This is purely a metadata + transformation. + + Example: + + ```mlir + %1 = transpose %0 (i, j) -> (j, i) : memref to memref (d1 * s0 + d0)>> + ``` + }]; + + let builders = [OpBuilder< + "OpBuilder &b, OperationState &result, Value in, " + "AffineMapAttr permutation, ArrayRef attrs = {}">]; + + let extraClassDeclaration = [{ + static StringRef getPermutationAttrName() { return "permutation"; } + ShapedType getShapedType() { return in().getType().cast(); } + }]; + + let hasFolder = 1; +} + //===----------------------------------------------------------------------===// // TruncateIOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp index f38eabb9465d5..4f83297ee0312 100644 --- a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp +++ b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp @@ -284,57 +284,6 @@ class SliceOpConversion : public ConvertToLLVMPattern { } }; -/// Conversion pattern that transforms a linalg.transpose op into: -/// 1. A function entry `alloca` operation to allocate a ViewDescriptor. -/// 2. A load of the ViewDescriptor from the pointer allocated in 1. -/// 3. Updates to the ViewDescriptor to introduce the data ptr, offset, size -/// and stride. Size and stride are permutations of the original values. -/// 4. A store of the resulting ViewDescriptor to the alloca'ed pointer. -/// The linalg.transpose op is replaced by the alloca'ed pointer. -class TransposeOpConversion : public ConvertToLLVMPattern { -public: - explicit TransposeOpConversion(MLIRContext *context, - LLVMTypeConverter &lowering_) - : ConvertToLLVMPattern(TransposeOp::getOperationName(), context, - lowering_) {} - - LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - // Initialize the common boilerplate and alloca at the top of the FuncOp. - edsc::ScopedContext context(rewriter, op->getLoc()); - TransposeOpAdaptor adaptor(operands); - BaseViewConversionHelper baseDesc(adaptor.view()); - - auto transposeOp = cast(op); - // No permutation, early exit. - if (transposeOp.permutation().isIdentity()) - return rewriter.replaceOp(op, {baseDesc}), success(); - - BaseViewConversionHelper desc( - typeConverter.convertType(transposeOp.getShapedType())); - - // Copy the base and aligned pointers from the old descriptor to the new - // one. - desc.setAllocatedPtr(baseDesc.allocatedPtr()); - desc.setAlignedPtr(baseDesc.alignedPtr()); - - // Copy the offset pointer from the old descriptor to the new one. - desc.setOffset(baseDesc.offset()); - - // Iterate over the dimensions and apply size/stride permutation. - for (auto en : llvm::enumerate(transposeOp.permutation().getResults())) { - int sourcePos = en.index(); - int targetPos = en.value().cast().getPosition(); - desc.setSize(targetPos, baseDesc.size(sourcePos)); - desc.setStride(targetPos, baseDesc.stride(sourcePos)); - } - - rewriter.replaceOp(op, {desc}); - return success(); - } -}; - // YieldOp produces and LLVM::ReturnOp. class YieldOpConversion : public ConvertToLLVMPattern { public: @@ -356,7 +305,7 @@ void mlir::populateLinalgToLLVMConversionPatterns( LLVMTypeConverter &converter, OwningRewritePatternList &patterns, MLIRContext *ctx) { patterns.insert(ctx, converter); + YieldOpConversion>(ctx, converter); // Populate the type conversions for the linalg types. converter.addConversion( diff --git a/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp b/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp index 29b5f9cc996eb..ffb56138a7958 100644 --- a/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp +++ b/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp @@ -206,12 +206,12 @@ class CopyTransposeConversion : public OpRewritePattern { // If either inputPerm or outputPerm are non-identities, insert transposes. auto inputPerm = op.inputPermutation(); if (inputPerm.hasValue() && !inputPerm->isIdentity()) - in = rewriter.create(op.getLoc(), in, - AffineMapAttr::get(*inputPerm)); + in = rewriter.create(op.getLoc(), in, + AffineMapAttr::get(*inputPerm)); auto outputPerm = op.outputPermutation(); if (outputPerm.hasValue() && !outputPerm->isIdentity()) - out = rewriter.create( - op.getLoc(), out, AffineMapAttr::get(*outputPerm)); + out = rewriter.create(op.getLoc(), out, + AffineMapAttr::get(*outputPerm)); // If nothing was transposed, fail and let the conversion kick in. if (in == op.input() && out == op.output()) @@ -270,7 +270,7 @@ void ConvertLinalgToStandardPass::runOnOperation() { ConversionTarget target(getContext()); target.addLegalDialect(); target.addLegalOp(); - target.addLegalOp(); + target.addLegalOp(); OwningRewritePatternList patterns; populateLinalgToStandardConversionPatterns(patterns, &getContext()); if (failed(applyFullConversion(module, target, patterns))) diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 37d0c940aa267..731eab0c28dfc 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -3011,6 +3011,57 @@ struct SubViewOpLowering : public ConvertOpToLLVMPattern { } }; +/// Conversion pattern that transforms a transpose op into: +/// 1. A function entry `alloca` operation to allocate a ViewDescriptor. +/// 2. A load of the ViewDescriptor from the pointer allocated in 1. +/// 3. Updates to the ViewDescriptor to introduce the data ptr, offset, size +/// and stride. Size and stride are permutations of the original values. +/// 4. A store of the resulting ViewDescriptor to the alloca'ed pointer. +/// The transpose op is replaced by the alloca'ed pointer. +class TransposeOpLowering : public ConvertOpToLLVMPattern { +public: + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto loc = op->getLoc(); + TransposeOpAdaptor adaptor(operands); + MemRefDescriptor viewMemRef(adaptor.in()); + + auto transposeOp = cast(op); + // No permutation, early exit. + if (transposeOp.permutation().isIdentity()) + return rewriter.replaceOp(op, {viewMemRef}), success(); + + auto targetMemRef = MemRefDescriptor::undef( + rewriter, loc, typeConverter.convertType(transposeOp.getShapedType())); + + // Copy the base and aligned pointers from the old descriptor to the new + // one. + targetMemRef.setAllocatedPtr(rewriter, loc, + viewMemRef.allocatedPtr(rewriter, loc)); + targetMemRef.setAlignedPtr(rewriter, loc, + viewMemRef.alignedPtr(rewriter, loc)); + + // Copy the offset pointer from the old descriptor to the new one. + targetMemRef.setOffset(rewriter, loc, viewMemRef.offset(rewriter, loc)); + + // Iterate over the dimensions and apply size/stride permutation. + for (auto en : llvm::enumerate(transposeOp.permutation().getResults())) { + int sourcePos = en.index(); + int targetPos = en.value().cast().getPosition(); + targetMemRef.setSize(rewriter, loc, targetPos, + viewMemRef.size(rewriter, loc, sourcePos)); + targetMemRef.setStride(rewriter, loc, targetPos, + viewMemRef.stride(rewriter, loc, sourcePos)); + } + + rewriter.replaceOp(op, {targetMemRef}); + return success(); + } +}; + /// Conversion pattern that transforms an op into: /// 1. An `llvm.mlir.undef` operation to create a memref descriptor /// 2. Updates to the descriptor to introduce the data ptr, offset, size @@ -3425,6 +3476,7 @@ void mlir::populateStdToLLVMMemoryConversionPatterns( RankOpLowering, StoreOpLowering, SubViewOpLowering, + TransposeOpLowering, ViewOpLowering, AllocOpLowering>(converter); // clang-format on diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index ca2260836d9f3..e9cdb3391f4a2 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -973,86 +973,6 @@ static LogicalResult verify(SliceOp op) { Value SliceOp::getViewSource() { return view(); } -//===----------------------------------------------------------------------===// -// TransposeOp -//===----------------------------------------------------------------------===// - -static MemRefType inferTransposeResultType(MemRefType memRefType, - AffineMap permutationMap) { - auto rank = memRefType.getRank(); - auto originalSizes = memRefType.getShape(); - // Compute permuted sizes. - SmallVector sizes(rank, 0); - for (auto en : llvm::enumerate(permutationMap.getResults())) - sizes[en.index()] = - originalSizes[en.value().cast().getPosition()]; - - // Compute permuted strides. - int64_t offset; - SmallVector strides; - auto res = getStridesAndOffset(memRefType, strides, offset); - assert(succeeded(res) && strides.size() == static_cast(rank)); - (void)res; - auto map = - makeStridedLinearLayoutMap(strides, offset, memRefType.getContext()); - map = permutationMap ? map.compose(permutationMap) : map; - return MemRefType::Builder(memRefType).setShape(sizes).setAffineMaps(map); -} - -void mlir::linalg::TransposeOp::build(OpBuilder &b, OperationState &result, - Value view, AffineMapAttr permutation, - ArrayRef attrs) { - auto permutationMap = permutation.getValue(); - assert(permutationMap); - - auto memRefType = view.getType().cast(); - // Compute result type. - MemRefType resultType = inferTransposeResultType(memRefType, permutationMap); - - build(b, result, resultType, view, attrs); - result.addAttribute(TransposeOp::getPermutationAttrName(), permutation); -} - -static void print(OpAsmPrinter &p, TransposeOp op) { - p << op.getOperationName() << " " << op.view() << " " << op.permutation(); - p.printOptionalAttrDict(op.getAttrs(), - {TransposeOp::getPermutationAttrName()}); - p << " : " << op.view().getType() << " to " << op.getType(); -} - -static ParseResult parseTransposeOp(OpAsmParser &parser, - OperationState &result) { - OpAsmParser::OperandType view; - AffineMap permutation; - MemRefType srcType, dstType; - if (parser.parseOperand(view) || parser.parseAffineMap(permutation) || - parser.parseOptionalAttrDict(result.attributes) || - parser.parseColonType(srcType) || - parser.resolveOperand(view, srcType, result.operands) || - parser.parseKeywordType("to", dstType) || - parser.addTypeToList(dstType, result.types)) - return failure(); - - result.addAttribute(TransposeOp::getPermutationAttrName(), - AffineMapAttr::get(permutation)); - return success(); -} - -static LogicalResult verify(TransposeOp op) { - if (!op.permutation().isPermutation()) - return op.emitOpError("expected a permutation map"); - if (op.permutation().getNumDims() != op.getShapedType().getRank()) - return op.emitOpError( - "expected a permutation map of same rank as the view"); - - auto srcType = op.view().getType().cast(); - auto dstType = op.getType().cast(); - if (dstType != inferTransposeResultType(srcType, op.permutation())) - return op.emitOpError("output type ") - << dstType << " does not match transposed input type " << srcType; - return success(); -} - //===----------------------------------------------------------------------===// // YieldOp //===----------------------------------------------------------------------===// @@ -1359,11 +1279,6 @@ OpFoldResult SliceOp::fold(ArrayRef) { OpFoldResult TensorReshapeOp::fold(ArrayRef operands) { return foldReshapeOp(*this, operands); } -OpFoldResult TransposeOp::fold(ArrayRef) { - if (succeeded(foldMemRefCast(*this))) - return getResult(); - return {}; -} //===----------------------------------------------------------------------===// // Auto-generated Linalg named ops. diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 09600963be0ec..a4d739135aea3 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -3491,6 +3491,96 @@ static Type getTensorTypeFromMemRefType(Type type) { return NoneType::get(type.getContext()); } +//===----------------------------------------------------------------------===// +// TransposeOp +//===----------------------------------------------------------------------===// + +/// Build a strided memref type by applying `permutationMap` tp `memRefType`. +static MemRefType inferTransposeResultType(MemRefType memRefType, + AffineMap permutationMap) { + auto rank = memRefType.getRank(); + auto originalSizes = memRefType.getShape(); + // Compute permuted sizes. + SmallVector sizes(rank, 0); + for (auto en : llvm::enumerate(permutationMap.getResults())) + sizes[en.index()] = + originalSizes[en.value().cast().getPosition()]; + + // Compute permuted strides. + int64_t offset; + SmallVector strides; + auto res = getStridesAndOffset(memRefType, strides, offset); + assert(succeeded(res) && strides.size() == static_cast(rank)); + (void)res; + auto map = + makeStridedLinearLayoutMap(strides, offset, memRefType.getContext()); + map = permutationMap ? map.compose(permutationMap) : map; + return MemRefType::Builder(memRefType).setShape(sizes).setAffineMaps(map); +} + +void TransposeOp::build(OpBuilder &b, OperationState &result, Value in, + AffineMapAttr permutation, + ArrayRef attrs) { + auto permutationMap = permutation.getValue(); + assert(permutationMap); + + auto memRefType = in.getType().cast(); + // Compute result type. + MemRefType resultType = inferTransposeResultType(memRefType, permutationMap); + + build(b, result, resultType, in, attrs); + result.addAttribute(TransposeOp::getPermutationAttrName(), permutation); +} + +// transpose $in $permutation attr-dict : type($in) `to` type(results) +static void print(OpAsmPrinter &p, TransposeOp op) { + p << "transpose " << op.in() << " " << op.permutation(); + p.printOptionalAttrDict(op.getAttrs(), + {TransposeOp::getPermutationAttrName()}); + p << " : " << op.in().getType() << " to " << op.getType(); +} + +static ParseResult parseTransposeOp(OpAsmParser &parser, + OperationState &result) { + OpAsmParser::OperandType in; + AffineMap permutation; + MemRefType srcType, dstType; + if (parser.parseOperand(in) || parser.parseAffineMap(permutation) || + parser.parseOptionalAttrDict(result.attributes) || + parser.parseColonType(srcType) || + parser.resolveOperand(in, srcType, result.operands) || + parser.parseKeywordType("to", dstType) || + parser.addTypeToList(dstType, result.types)) + return failure(); + + result.addAttribute(TransposeOp::getPermutationAttrName(), + AffineMapAttr::get(permutation)); + return success(); +} + +static LogicalResult verify(TransposeOp op) { + if (!op.permutation().isPermutation()) + return op.emitOpError("expected a permutation map"); + if (op.permutation().getNumDims() != op.getShapedType().getRank()) + return op.emitOpError( + "expected a permutation map of same rank as the input"); + + auto srcType = op.in().getType().cast(); + auto dstType = op.getType().cast(); + auto transposedType = inferTransposeResultType(srcType, op.permutation()); + if (dstType != transposedType) + return op.emitOpError("output type ") + << dstType << " does not match transposed input type " << srcType + << ", " << transposedType; + return success(); +} + +OpFoldResult TransposeOp::fold(ArrayRef) { + if (succeeded(foldMemRefCast(*this))) + return getResult(); + return {}; +} + //===----------------------------------------------------------------------===// // TruncateIOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 663595ce161c4..672ad4058309a 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -673,7 +673,7 @@ static LogicalResult foldExtractOpFromExtractChain(ExtractOp extractOp) { /// Fold the result of an ExtractOp in place when it comes from a TransposeOp. static LogicalResult foldExtractOpFromTranspose(ExtractOp extractOp) { - auto transposeOp = extractOp.vector().getDefiningOp(); + auto transposeOp = extractOp.vector().getDefiningOp(); if (!transposeOp) return failure(); @@ -2521,7 +2521,7 @@ void vector::TransposeOp::build(OpBuilder &builder, OperationState &result, // Eliminates transpose operations, which produce values identical to their // input values. This happens when the dimensions of the input vector remain in // their original order after the transpose operation. -OpFoldResult TransposeOp::fold(ArrayRef operands) { +OpFoldResult vector::TransposeOp::fold(ArrayRef operands) { SmallVector transp; getTransp(transp); @@ -2535,7 +2535,7 @@ OpFoldResult TransposeOp::fold(ArrayRef operands) { return vector(); } -static LogicalResult verify(TransposeOp op) { +static LogicalResult verify(vector::TransposeOp op) { VectorType vectorType = op.getVectorType(); VectorType resultType = op.getResultType(); int64_t rank = resultType.getRank(); @@ -2563,14 +2563,14 @@ static LogicalResult verify(TransposeOp op) { namespace { // Rewrites two back-to-back TransposeOp operations into a single TransposeOp. -class TransposeFolder final : public OpRewritePattern { +class TransposeFolder final : public OpRewritePattern { public: - using OpRewritePattern::OpRewritePattern; + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(TransposeOp transposeOp, + LogicalResult matchAndRewrite(vector::TransposeOp transposeOp, PatternRewriter &rewriter) const override { - // Wrapper around TransposeOp::getTransp() for cleaner code. - auto getPermutation = [](TransposeOp transpose) { + // Wrapper around vector::TransposeOp::getTransp() for cleaner code. + auto getPermutation = [](vector::TransposeOp transpose) { SmallVector permutation; transpose.getTransp(permutation); return permutation; @@ -2586,15 +2586,15 @@ class TransposeFolder final : public OpRewritePattern { }; // Return if the input of 'transposeOp' is not defined by another transpose. - TransposeOp parentTransposeOp = - transposeOp.vector().getDefiningOp(); + vector::TransposeOp parentTransposeOp = + transposeOp.vector().getDefiningOp(); if (!parentTransposeOp) return failure(); SmallVector permutation = composePermutations( getPermutation(parentTransposeOp), getPermutation(transposeOp)); // Replace 'transposeOp' with a new transpose operation. - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( transposeOp, transposeOp.getResult().getType(), parentTransposeOp.vector(), vector::getVectorSubscriptAttr(rewriter, permutation)); @@ -2604,12 +2604,12 @@ class TransposeFolder final : public OpRewritePattern { } // end anonymous namespace -void TransposeOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { +void vector::TransposeOp::getCanonicalizationPatterns( + OwningRewritePatternList &results, MLIRContext *context) { results.insert(context); } -void TransposeOp::getTransp(SmallVectorImpl &results) { +void vector::TransposeOp::getTransp(SmallVectorImpl &results) { populateFromInt64AttrArray(transp(), results); } diff --git a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir index c7363085817e1..71a35f6ccf0a2 100644 --- a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir +++ b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir @@ -114,3 +114,20 @@ func @assert_test_function(%arg : i1) { return } +// ----- + +// CHECK-LABEL: func @transpose +// CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.insertvalue {{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.insertvalue {{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.insertvalue {{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.extractvalue {{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.insertvalue {{.*}}[3, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.extractvalue {{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.insertvalue {{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.extractvalue {{.*}}[3, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.insertvalue {{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +func @transpose(%arg0: memref) { + %0 = transpose %arg0 (i, j, k) -> (k, i, j) : memref to memref (d2 * s1 + s0 + d0 * s2 + d1)>> + return +} diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index 004bf9260a821..dcfafdc4d27a4 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -33,27 +33,6 @@ func @store_number_of_indices(%v : memref) { // ----- -func @transpose_not_permutation(%v : memref(off + M * i + j)>>) { - // expected-error @+1 {{expected a permutation map}} - linalg.transpose %v (i, j) -> (i, i) : memref(off + M * i + j)>> to memref(off + M * i + j)>> -} - -// ----- - -func @transpose_bad_rank(%v : memref(off + M * i + j)>>) { - // expected-error @+1 {{expected a permutation map of same rank as the view}} - linalg.transpose %v (i) -> (i) : memref(off + M * i + j)>> to memref(off + M * i + j)>> -} - -// ----- - -func @transpose_wrong_type(%v : memref(off + M * i + j)>>) { - // expected-error @+1 {{output type 'memref (d0 * s1 + s0 + d1)>>' does not match transposed input type 'memref (d0 * s1 + s0 + d1)>>'}} - linalg.transpose %v (i, j) -> (j, i) : memref(off + M * i + j)>> to memref(off + M * i + j)>> -} - -// ----- - func @yield_parent(%arg0: memref(off + i)>>) { // expected-error @+1 {{op expected parent op with LinalgOp interface}} linalg.yield %arg0: memref(off + i)>> diff --git a/mlir/test/Dialect/Linalg/llvm.mlir b/mlir/test/Dialect/Linalg/llvm.mlir index c8031824d6307..9303a7aa6b312 100644 --- a/mlir/test/Dialect/Linalg/llvm.mlir +++ b/mlir/test/Dialect/Linalg/llvm.mlir @@ -69,22 +69,6 @@ func @slice_with_range_and_index(%arg0: memref, ptr, i64, array<1 x i64>, array<1 x i64>)> // CHECK: llvm.insertvalue %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -func @transpose(%arg0: memref) { - %0 = linalg.transpose %arg0 (i, j, k) -> (k, i, j) : memref to memref (d2 * s1 + s0 + d0 * s2 + d1)>> - return -} -// CHECK-LABEL: func @transpose -// CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK: llvm.insertvalue {{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK: llvm.insertvalue {{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK: llvm.insertvalue {{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK: llvm.extractvalue {{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK: llvm.insertvalue {{.*}}[3, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK: llvm.extractvalue {{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK: llvm.insertvalue {{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK: llvm.extractvalue {{.*}}[3, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK: llvm.insertvalue {{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> - func @reshape_static_expand(%arg0: memref<3x4x5xf32>) -> memref<1x3x4x1x5xf32> { // Reshapes that expand a contiguous tensor with some 1's. %0 = linalg.reshape %arg0 [affine_map<(i, j, k, l, m) -> (i, j)>, diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir index 5960d5525f444..868cabb5eff35 100644 --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -126,11 +126,11 @@ func @fill_view(%arg0: memref, %arg1: f32) { // CHECK-DAG: #[[$strided3DT:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2 * s1 + s0 + d1 * s2 + d0)> func @transpose(%arg0: memref) { - %0 = linalg.transpose %arg0 (i, j, k) -> (k, j, i) : memref to memref (d2 * s1 + s0 + d1 * s2 + d0)>> + %0 = transpose %arg0 (i, j, k) -> (k, j, i) : memref to memref (d2 * s1 + s0 + d1 * s2 + d0)>> return } // CHECK-LABEL: func @transpose -// CHECK: linalg.transpose %{{.*}} ([[i:.*]], [[j:.*]], [[k:.*]]) -> ([[k]], [[j]], [[i]]) : +// CHECK: transpose %{{.*}} ([[i:.*]], [[j:.*]], [[k:.*]]) -> ([[k]], [[j]], [[i]]) : // CHECK-SAME: memref to memref // ----- diff --git a/mlir/test/Dialect/Linalg/standard.mlir b/mlir/test/Dialect/Linalg/standard.mlir index 14b4e2a01c301..eee2ca1d1a1cb 100644 --- a/mlir/test/Dialect/Linalg/standard.mlir +++ b/mlir/test/Dialect/Linalg/standard.mlir @@ -55,9 +55,9 @@ func @copy_transpose(%arg0: memref, %a // CHECK-LABEL: func @copy_transpose( // CHECK-SAME: %[[arg0:[a-zA-z0-9]*]]: memref, // CHECK-SAME: %[[arg1:[a-zA-z0-9]*]]: memref) { -// CHECK: %[[t0:.*]] = linalg.transpose %[[arg0]] +// CHECK: %[[t0:.*]] = transpose %[[arg0]] // CHECK-SAME: (d0, d1, d2) -> (d0, d2, d1) : memref -// CHECK: %[[t1:.*]] = linalg.transpose %[[arg1]] +// CHECK: %[[t1:.*]] = transpose %[[arg1]] // CHECK-SAME: (d0, d1, d2) -> (d2, d1, d0) : memref // CHECK: %[[o0:.*]] = memref_cast %[[t0]] : // CHECK-SAME: memref to memref diff --git a/mlir/test/Dialect/Standard/invalid.mlir b/mlir/test/Dialect/Standard/invalid.mlir index 7f9c564e74f3f..72fe5c227578a 100644 --- a/mlir/test/Dialect/Standard/invalid.mlir +++ b/mlir/test/Dialect/Standard/invalid.mlir @@ -81,3 +81,24 @@ func @dynamic_tensor_from_elements(%m : index, %n : index) } : tensor return %tnsr : tensor } + +// ----- + +func @transpose_not_permutation(%v : memref(off + M * i + j)>>) { + // expected-error @+1 {{expected a permutation map}} + transpose %v (i, j) -> (i, i) : memref(off + M * i + j)>> to memref(off + M * i + j)>> +} + +// ----- + +func @transpose_bad_rank(%v : memref(off + M * i + j)>>) { + // expected-error @+1 {{expected a permutation map of same rank as the input}} + transpose %v (i) -> (i) : memref(off + M * i + j)>> to memref(off + M * i + j)>> +} + +// ----- + +func @transpose_wrong_type(%v : memref(off + M * i + j)>>) { + // expected-error @+1 {{output type 'memref (d0 * s1 + s0 + d1)>>' does not match transposed input type 'memref (d0 * s1 + s0 + d1)>>'}} + transpose %v (i, j) -> (j, i) : memref(off + M * i + j)>> to memref(off + M * i + j)>> +} From 5b5e78a43124c0ced813f378195b36098f716c8f Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Mon, 21 Sep 2020 17:17:29 +0900 Subject: [PATCH 012/321] [VE] Support register and frame-index pair correctly Support register and frame-index pair correctly as operands of generic load/store instrucitons, e.g. LD1BZXrri, STLrri, and etc. Add regression tests also. Differential Revision: https://reviews.llvm.org/D88779 --- llvm/lib/Target/VE/VEISelDAGToDAG.cpp | 8 ++++ llvm/test/CodeGen/VE/loadrri.ll | 64 +++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 llvm/test/CodeGen/VE/loadrri.ll diff --git a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp index f3d067d55fdb6..13759646fe911 100644 --- a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp +++ b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp @@ -183,6 +183,14 @@ bool VEDAGToDAGISel::selectADDRrri(SDValue Addr, SDValue &Base, SDValue &Index, return false; } if (matchADDRrr(Addr, LHS, RHS)) { + // If the input is a pair of a frame-index and a register, move a + // frame-index to LHS. This generates MI with following operands. + // %dest, #FI, %reg, offset + // In the eliminateFrameIndex, above MI is converted to the following. + // %dest, %fp, %reg, fi_offset + offset + if (dyn_cast(RHS)) + std::swap(LHS, RHS); + if (matchADDRri(RHS, Index, Offset)) { Base = LHS; return true; diff --git a/llvm/test/CodeGen/VE/loadrri.ll b/llvm/test/CodeGen/VE/loadrri.ll new file mode 100644 index 0000000000000..fafd695a21200 --- /dev/null +++ b/llvm/test/CodeGen/VE/loadrri.ll @@ -0,0 +1,64 @@ +; RUN: llc < %s -mtriple=ve | FileCheck %s + +%struct.data = type { [4 x i8] } + +;;; Check basic usage of rri format load instructions. +;;; Our target is DAG selection mechanism for LD1BSXrri. +;;; We prepared following three styles. +;;; 1. LD1BSXrri with %reg1 + %reg2 +;;; 2. LD1BSXrri with %frame-index + %reg +;;; 3. LD1BSXrri with %reg + %frame-index + +; Function Attrs: norecurse nounwind readonly +define signext i8 @func_rr(%struct.data* nocapture readonly %0, i32 signext %1) { +; CHECK-LABEL: func_rr: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: sll %s1, %s1, 2 +; CHECK-NEXT: ld1b.sx %s0, (%s1, %s0) +; CHECK-NEXT: or %s11, 0, %s9 + %3 = sext i32 %1 to i64 + %4 = getelementptr inbounds %struct.data, %struct.data* %0, i64 %3, i32 0, i64 0 + %5 = load i8, i8* %4, align 1 + ret i8 %5 +} + +; Function Attrs: nounwind +define signext i8 @func_fr(%struct.data* readonly %0, i32 signext %1) { +; CHECK-LABEL: func_fr: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: sll %s1, %s1, 2 +; CHECK-NEXT: ldl.sx %s0, (%s1, %s0) +; CHECK-NEXT: stl %s0, 184(%s1, %s11) +; CHECK-NEXT: ld1b.sx %s0, 184(%s1, %s11) +; CHECK-NEXT: or %s11, 0, %s9 + %3 = alloca [10 x %struct.data], align 1 + %4 = getelementptr inbounds [10 x %struct.data], [10 x %struct.data]* %3, i64 0, i64 0, i32 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 40, i8* nonnull %4) + %5 = sext i32 %1 to i64 + %6 = getelementptr inbounds [10 x %struct.data], [10 x %struct.data]* %3, i64 0, i64 %5, i32 0, i64 0 + %7 = getelementptr inbounds %struct.data, %struct.data* %0, i64 %5, i32 0, i64 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %6, i8* align 1 %7, i64 4, i1 true) + %8 = load volatile i8, i8* %6, align 1 + call void @llvm.lifetime.end.p0i8(i64 40, i8* nonnull %4) + ret i8 %8 +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +%"basic_string" = type { %union.anon.3, [23 x i8] } +%union.anon.3 = type { i8 } + +define signext i8 @func_rf(i8* readonly %0, i64 %1, i32 signext %2) { +; CHECK-LABEL: func_rf: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ld1b.sx %s0, 184(%s1, %s11) +; CHECK-NEXT: or %s11, 0, %s9 + %buf = alloca %"basic_string", align 8 + + %sub631 = add nsw i64 %1, -1 + %add.ptr.i = getelementptr inbounds %"basic_string", %"basic_string"* %buf, i64 0, i32 1, i64 %sub631 + %ret = load i8, i8* %add.ptr.i, align 1 + ret i8 %ret +} From d52211e384773ae06aabf476c78f16d2976660b0 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Mon, 5 Oct 2020 05:44:27 -0400 Subject: [PATCH 013/321] [mlir] Fix SubViewOp doc in .td --- mlir/include/mlir/Dialect/StandardOps/IR/Ops.td | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 4a014cb7060c3..69c979ae9e387 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -3028,10 +3028,11 @@ def SubViewOp : BaseOpWithOffsetSizesAndStrides< // memref is "inbounds" w.r.t to base memref. It is upto the client // to ensure that the subview is accessed in a manner that is // in-bounds. + ``` Example 5: - ``` + ```mlir // Rank-reducing subview. %1 = subview %0[0, 0, 0][1, 16, 4][1, 1, 1] : memref<8x16x4xf32> to memref<16x4xf32> From 6a089ce0e40abbe4e0f26f05540e3caa60d98a29 Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Wed, 30 Sep 2020 14:15:24 +0200 Subject: [PATCH 014/321] [AMDGPU] Use tablegen for argument indices Use tablegen generic tables to get the index of image intrinsic arguments. Before, the computation of which image intrinsic argument is at which index was scattered in a few places, tablegen, the SDag instruction selection and GlobalISel. This patch changes that, so only tablegen contains code to compute indices and the ImageDimIntrinsicInfo table provides these information. Differential Revision: https://reviews.llvm.org/D86270 --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 14 ++- .../lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h | 31 ----- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h | 27 ++++- .../AMDGPU/AMDGPUInstructionSelector.cpp | 51 ++++---- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 111 +++++++++--------- llvm/lib/Target/AMDGPU/MIMGInstructions.td | 21 +++- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 83 ++++++------- llvm/lib/Target/AMDGPU/SIISelLowering.h | 2 +- 8 files changed, 162 insertions(+), 178 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 918ab3efc0ad5..ed96c0896d742 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -688,11 +688,15 @@ class AMDGPUImageDimIntrinsicEval { int NumRSrcArgs = 1; int NumSampArgs = !if(P_.IsSample, 2, 0); int DmaskArgIndex = NumDataArgs; - int VAddrArgIndex = !add(NumDataArgs, NumDmaskArgs); - int GradientArgIndex = !add(NumDataArgs, NumDmaskArgs, NumExtraAddrArgs); - int CoordArgIndex = !add(NumDataArgs, NumDmaskArgs, NumExtraAddrArgs, NumGradientArgs); - int UnormArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, 1); - int TexFailCtrlArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, NumSampArgs); + int VAddrArgIndex = !add(DmaskArgIndex, NumDmaskArgs); + int GradientArgIndex = !add(VAddrArgIndex, NumExtraAddrArgs); + int CoordArgIndex = !add(GradientArgIndex, NumGradientArgs); + int LodArgIndex = !add(VAddrArgIndex, NumVAddrArgs, -1); + int MipArgIndex = LodArgIndex; + int RsrcArgIndex = !add(VAddrArgIndex, NumVAddrArgs); + int SampArgIndex = !add(RsrcArgIndex, NumRSrcArgs); + int UnormArgIndex = !add(SampArgIndex, 1); + int TexFailCtrlArgIndex = !add(SampArgIndex, NumSampArgs); int CachePolicyArgIndex = !add(TexFailCtrlArgIndex, 1); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h index 766750758efc2..87bb88cec502c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h @@ -9,7 +9,6 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUGLOBALISELUTILS_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUGLOBALISELUTILS_H -#include "AMDGPUInstrInfo.h" #include "llvm/CodeGen/Register.h" #include @@ -26,36 +25,6 @@ getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg); bool isLegalVOP3PShuffleMask(ArrayRef Mask); -/// Return number of address arguments, and the number of gradients for an image -/// intrinsic. -inline std::pair -getImageNumVAddr(const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, - const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode) { - const AMDGPU::MIMGDimInfo *DimInfo - = AMDGPU::getMIMGDimInfo(ImageDimIntr->Dim); - - int NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0; - int NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0; - int NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0; - int NumVAddr = BaseOpcode->NumExtraArgs + NumGradients + NumCoords + NumLCM; - return {NumVAddr, NumGradients}; -} - -/// Return index of dmask in an gMIR image intrinsic -inline int getDMaskIdx(const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode, - int NumDefs) { - assert(!BaseOpcode->Atomic); - return NumDefs + 1 + (BaseOpcode->Store ? 1 : 0); -} - -/// Return first address operand index in a gMIR image intrinsic. -inline int getImageVAddrIdxBegin(const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode, - int NumDefs) { - if (BaseOpcode->Atomic) - return NumDefs + 1 + (BaseOpcode->AtomicX2 ? 2 : 1); - return getDMaskIdx(BaseOpcode, NumDefs) + 1; -} - } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h index a94737b1d3d5d..304dcb5d47f13 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -52,11 +52,28 @@ struct ImageDimIntrinsicInfo { unsigned Intr; unsigned BaseOpcode; MIMGDim Dim; - unsigned GradientStart; - unsigned CoordStart; - unsigned VAddrEnd; - unsigned GradientTyArg; - unsigned CoordTyArg; + + uint8_t NumGradients; + uint8_t NumDmask; + uint8_t NumData; + uint8_t NumVAddrs; + uint8_t NumArgs; + + uint8_t DMaskIndex; + uint8_t VAddrStart; + uint8_t GradientStart; + uint8_t CoordStart; + uint8_t LodIndex; + uint8_t MipIndex; + uint8_t VAddrEnd; + uint8_t RsrcIndex; + uint8_t SampIndex; + uint8_t UnormIndex; + uint8_t TexFailCtrlIndex; + uint8_t CachePolicyIndex; + + uint8_t GradientTyArg; + uint8_t CoordTyArg; }; const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 58d30dc9ddf6a..341e28b760af4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1485,34 +1485,27 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( unsigned IntrOpcode = Intr->BaseOpcode; const bool IsGFX10 = STI.getGeneration() >= AMDGPUSubtarget::GFX10; - const int VAddrIdx = getImageVAddrIdxBegin(BaseOpcode, - MI.getNumExplicitDefs()); - int NumVAddr, NumGradients; - std::tie(NumVAddr, NumGradients) = getImageNumVAddr(Intr, BaseOpcode); + const unsigned ArgOffset = MI.getNumExplicitDefs() + 1; Register VDataIn, VDataOut; LLT VDataTy; int NumVDataDwords = -1; bool IsD16 = false; - // XXX - Can we just get the second to last argument for ctrl? - unsigned CtrlIdx; // Index of texfailctrl argument bool Unorm; - if (!BaseOpcode->Sampler) { + if (!BaseOpcode->Sampler) Unorm = true; - CtrlIdx = VAddrIdx + NumVAddr + 1; - } else { - Unorm = MI.getOperand(VAddrIdx + NumVAddr + 2).getImm() != 0; - CtrlIdx = VAddrIdx + NumVAddr + 3; - } + else + Unorm = MI.getOperand(ArgOffset + Intr->UnormIndex).getImm() != 0; bool TFE; bool LWE; bool IsTexFail = false; - if (!parseTexFail(MI.getOperand(CtrlIdx).getImm(), TFE, LWE, IsTexFail)) + if (!parseTexFail(MI.getOperand(ArgOffset + Intr->TexFailCtrlIndex).getImm(), + TFE, LWE, IsTexFail)) return false; - const int Flags = MI.getOperand(CtrlIdx + 2).getImm(); + const int Flags = MI.getOperand(ArgOffset + Intr->NumArgs).getImm(); const bool IsA16 = (Flags & 1) != 0; const bool IsG16 = (Flags & 2) != 0; @@ -1543,9 +1536,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( NumVDataDwords = Is64Bit ? 2 : 1; } } else { - const int DMaskIdx = 2; // Input/output + intrinsic ID. - - DMask = MI.getOperand(DMaskIdx).getImm(); + DMask = MI.getOperand(ArgOffset + Intr->DMaskIndex).getImm(); DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask); if (BaseOpcode->Store) { @@ -1576,7 +1567,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( if (LZMappingInfo) { // The legalizer replaced the register with an immediate 0 if we need to // change the opcode. - const MachineOperand &Lod = MI.getOperand(VAddrIdx + NumVAddr - 1); + const MachineOperand &Lod = MI.getOperand(ArgOffset + Intr->LodIndex); if (Lod.isImm()) { assert(Lod.getImm() == 0); IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l @@ -1585,7 +1576,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( // Optimize _mip away, when 'lod' is zero if (MIPMappingInfo) { - const MachineOperand &Lod = MI.getOperand(VAddrIdx + NumVAddr - 1); + const MachineOperand &Lod = MI.getOperand(ArgOffset + Intr->MipIndex); if (Lod.isImm()) { assert(Lod.getImm() == 0); IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip @@ -1608,20 +1599,22 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( bool DLC = false; if (BaseOpcode->Atomic) { GLC = true; // TODO no-return optimization - if (!parseCachePolicy(MI.getOperand(CtrlIdx + 1).getImm(), nullptr, &SLC, - IsGFX10 ? &DLC : nullptr)) + if (!parseCachePolicy( + MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(), nullptr, + &SLC, IsGFX10 ? &DLC : nullptr)) return false; } else { - if (!parseCachePolicy(MI.getOperand(CtrlIdx + 1).getImm(), &GLC, &SLC, - IsGFX10 ? &DLC : nullptr)) + if (!parseCachePolicy( + MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(), &GLC, + &SLC, IsGFX10 ? &DLC : nullptr)) return false; } int NumVAddrRegs = 0; int NumVAddrDwords = 0; - for (int I = 0; I < NumVAddr; ++I) { + for (unsigned I = Intr->VAddrStart; I < Intr->VAddrEnd; I++) { // Skip the $noregs and 0s inserted during legalization. - MachineOperand &AddrOp = MI.getOperand(VAddrIdx + I); + MachineOperand &AddrOp = MI.getOperand(ArgOffset + I); if (!AddrOp.isReg()) continue; // XXX - Break? @@ -1684,17 +1677,17 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( if (VDataIn) MIB.addReg(VDataIn); // vdata input - for (int i = 0; i != NumVAddrRegs; ++i) { - MachineOperand &SrcOp = MI.getOperand(VAddrIdx + i); + for (int I = 0; I != NumVAddrRegs; ++I) { + MachineOperand &SrcOp = MI.getOperand(ArgOffset + Intr->VAddrStart + I); if (SrcOp.isReg()) { assert(SrcOp.getReg() != 0); MIB.addReg(SrcOp.getReg()); } } - MIB.addReg(MI.getOperand(VAddrIdx + NumVAddr).getReg()); // rsrc + MIB.addReg(MI.getOperand(ArgOffset + Intr->RsrcIndex).getReg()); if (BaseOpcode->Sampler) - MIB.addReg(MI.getOperand(VAddrIdx + NumVAddr + 1).getReg()); // sampler + MIB.addReg(MI.getOperand(ArgOffset + Intr->SampIndex).getReg()); MIB.addImm(DMask); // dmask diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 0f1eb03f0c27d..fa3130ab3fe01 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3920,38 +3920,39 @@ bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI, /// Turn a set of s16 typed registers in \p A16AddrRegs into a dword sized /// vector with s16 typed elements. -static void packImageA16AddressToDwords(MachineIRBuilder &B, MachineInstr &MI, - SmallVectorImpl &PackedAddrs, - int AddrIdx, int DimIdx, int EndIdx, - int NumGradients) { +static void packImageA16AddressToDwords( + MachineIRBuilder &B, MachineInstr &MI, + SmallVectorImpl &PackedAddrs, unsigned ArgOffset, + const AMDGPU::ImageDimIntrinsicInfo *Intr, unsigned EndIdx) { const LLT S16 = LLT::scalar(16); const LLT V2S16 = LLT::vector(2, 16); - for (int I = AddrIdx; I < EndIdx; ++I) { - MachineOperand &SrcOp = MI.getOperand(I); + for (unsigned I = Intr->VAddrStart; I < EndIdx; I++) { + MachineOperand &SrcOp = MI.getOperand(ArgOffset + I); if (!SrcOp.isReg()) continue; // _L to _LZ may have eliminated this. Register AddrReg = SrcOp.getReg(); - if (I < DimIdx) { + if (I < Intr->GradientStart) { AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0); PackedAddrs.push_back(AddrReg); } else { // Dz/dh, dz/dv and the last odd coord are packed with undef. Also, in 1D, // derivatives dx/dh and dx/dv are packed with undef. if (((I + 1) >= EndIdx) || - ((NumGradients / 2) % 2 == 1 && - (I == DimIdx + (NumGradients / 2) - 1 || - I == DimIdx + NumGradients - 1)) || + ((Intr->NumGradients / 2) % 2 == 1 && + (I == Intr->GradientStart + (Intr->NumGradients / 2) - 1 || + I == Intr->GradientStart + Intr->NumGradients - 1)) || // Check for _L to _LZ optimization - !MI.getOperand(I + 1).isReg()) { + !MI.getOperand(ArgOffset + I + 1).isReg()) { PackedAddrs.push_back( B.buildBuildVector(V2S16, {AddrReg, B.buildUndef(S16).getReg(0)}) .getReg(0)); } else { PackedAddrs.push_back( - B.buildBuildVector(V2S16, {AddrReg, MI.getOperand(I + 1).getReg()}) + B.buildBuildVector( + V2S16, {AddrReg, MI.getOperand(ArgOffset + I + 1).getReg()}) .getReg(0)); ++I; } @@ -4010,43 +4011,37 @@ static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI, /// the intrinsic's arguments. In cases like a16 addreses, this requires padding /// now unnecessary arguments with $noreg. bool AMDGPULegalizerInfo::legalizeImageIntrinsic( - MachineInstr &MI, MachineIRBuilder &B, - GISelChangeObserver &Observer, - const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) const { + MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer, + const AMDGPU::ImageDimIntrinsicInfo *Intr) const { - const int NumDefs = MI.getNumExplicitDefs(); + const unsigned NumDefs = MI.getNumExplicitDefs(); + const unsigned ArgOffset = NumDefs + 1; bool IsTFE = NumDefs == 2; // We are only processing the operands of d16 image operations on subtargets // that use the unpacked register layout, or need to repack the TFE result. // TODO: Do we need to guard against already legalized intrinsics? const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = - AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode); + AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode); MachineRegisterInfo *MRI = B.getMRI(); const LLT S32 = LLT::scalar(32); const LLT S16 = LLT::scalar(16); const LLT V2S16 = LLT::vector(2, 16); - // Index of first address argument - const int AddrIdx = getImageVAddrIdxBegin(BaseOpcode, NumDefs); - - int NumVAddrs, NumGradients; - std::tie(NumVAddrs, NumGradients) = getImageNumVAddr(ImageDimIntr, BaseOpcode); - const int DMaskIdx = BaseOpcode->Atomic ? -1 : - getDMaskIdx(BaseOpcode, NumDefs); unsigned DMask = 0; // Check for 16 bit addresses and pack if true. - int DimIdx = AddrIdx + BaseOpcode->NumExtraArgs; - LLT GradTy = MRI->getType(MI.getOperand(DimIdx).getReg()); - LLT AddrTy = MRI->getType(MI.getOperand(DimIdx + NumGradients).getReg()); + LLT GradTy = + MRI->getType(MI.getOperand(ArgOffset + Intr->GradientStart).getReg()); + LLT AddrTy = + MRI->getType(MI.getOperand(ArgOffset + Intr->CoordStart).getReg()); const bool IsG16 = GradTy == S16; const bool IsA16 = AddrTy == S16; int DMaskLanes = 0; if (!BaseOpcode->Atomic) { - DMask = MI.getOperand(DMaskIdx).getImm(); + DMask = MI.getOperand(ArgOffset + Intr->DMaskIndex).getImm(); if (BaseOpcode->Gather4) { DMaskLanes = 4; } else if (DMask != 0) { @@ -4073,7 +4068,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( if (IsTFE && DMask == 0) { DMask = 0x1; DMaskLanes = 1; - MI.getOperand(DMaskIdx).setImm(DMask); + MI.getOperand(ArgOffset + Intr->DMaskIndex).setImm(DMask); } if (BaseOpcode->Atomic) { @@ -4094,41 +4089,41 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( } } - int CorrectedNumVAddrs = NumVAddrs; + unsigned CorrectedNumVAddrs = Intr->NumVAddrs; // Optimize _L to _LZ when _L is zero if (const AMDGPU::MIMGLZMappingInfo *LZMappingInfo = - AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) { + AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode)) { const ConstantFP *ConstantLod; - const int LodIdx = AddrIdx + NumVAddrs - 1; - if (mi_match(MI.getOperand(LodIdx).getReg(), *MRI, m_GFCst(ConstantLod))) { + if (mi_match(MI.getOperand(ArgOffset + Intr->LodIndex).getReg(), *MRI, + m_GFCst(ConstantLod))) { if (ConstantLod->isZero() || ConstantLod->isNegative()) { // Set new opcode to _lz variant of _l, and change the intrinsic ID. - ImageDimIntr = AMDGPU::getImageDimInstrinsicByBaseOpcode( - LZMappingInfo->LZ, ImageDimIntr->Dim); + const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = + AMDGPU::getImageDimInstrinsicByBaseOpcode(LZMappingInfo->LZ, + Intr->Dim); // The starting indexes should remain in the same place. - --NumVAddrs; --CorrectedNumVAddrs; - MI.getOperand(MI.getNumExplicitDefs()).setIntrinsicID( - static_cast(ImageDimIntr->Intr)); - MI.RemoveOperand(LodIdx); + MI.getOperand(MI.getNumExplicitDefs()) + .setIntrinsicID(static_cast(NewImageDimIntr->Intr)); + MI.RemoveOperand(ArgOffset + Intr->LodIndex); + Intr = NewImageDimIntr; } } } // Optimize _mip away, when 'lod' is zero - if (AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) { + if (AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode)) { int64_t ConstantLod; - const int LodIdx = AddrIdx + NumVAddrs - 1; - - if (mi_match(MI.getOperand(LodIdx).getReg(), *MRI, m_ICst(ConstantLod))) { + if (mi_match(MI.getOperand(ArgOffset + Intr->MipIndex).getReg(), *MRI, + m_ICst(ConstantLod))) { if (ConstantLod == 0) { // TODO: Change intrinsic opcode and remove operand instead or replacing // it with 0, as the _L to _LZ handling is done above. - MI.getOperand(LodIdx).ChangeToImmediate(0); + MI.getOperand(ArgOffset + Intr->MipIndex).ChangeToImmediate(0); --CorrectedNumVAddrs; } } @@ -4143,18 +4138,17 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( } else if (!ST.hasG16()) return false; - if (NumVAddrs > 1) { + if (Intr->NumVAddrs > 1) { SmallVector PackedRegs; // Don't compress addresses for G16 - const int PackEndIdx = - IsA16 ? (AddrIdx + NumVAddrs) : (DimIdx + NumGradients); - packImageA16AddressToDwords(B, MI, PackedRegs, AddrIdx, DimIdx, - PackEndIdx, NumGradients); + const int PackEndIdx = IsA16 ? Intr->VAddrEnd : Intr->CoordStart; + packImageA16AddressToDwords(B, MI, PackedRegs, ArgOffset, Intr, + PackEndIdx); if (!IsA16) { // Add uncompressed address - for (int I = DimIdx + NumGradients; I != AddrIdx + NumVAddrs; ++I) { - int AddrReg = MI.getOperand(I).getReg(); + for (unsigned I = Intr->CoordStart; I < Intr->VAddrEnd; I++) { + int AddrReg = MI.getOperand(ArgOffset + I).getReg(); assert(B.getMRI()->getType(AddrReg) == LLT::scalar(32)); PackedRegs.push_back(AddrReg); } @@ -4170,9 +4164,9 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( PackedRegs.resize(1); } - const int NumPacked = PackedRegs.size(); - for (int I = 0; I != NumVAddrs; ++I) { - MachineOperand &SrcOp = MI.getOperand(AddrIdx + I); + const unsigned NumPacked = PackedRegs.size(); + for (unsigned I = Intr->VAddrStart; I < Intr->VAddrEnd; I++) { + MachineOperand &SrcOp = MI.getOperand(ArgOffset + I); if (!SrcOp.isReg()) { assert(SrcOp.isImm() && SrcOp.getImm() == 0); continue; @@ -4180,8 +4174,8 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( assert(SrcOp.getReg() != AMDGPU::NoRegister); - if (I < NumPacked) - SrcOp.setReg(PackedRegs[I]); + if (I - Intr->VAddrStart < NumPacked) + SrcOp.setReg(PackedRegs[I - Intr->VAddrStart]); else SrcOp.setReg(AMDGPU::NoRegister); } @@ -4200,8 +4194,9 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( // allocation when possible. const bool UseNSA = CorrectedNumVAddrs >= 3 && ST.hasNSAEncoding(); - if (!UseNSA && NumVAddrs > 1) - convertImageAddrToPacked(B, MI, AddrIdx, NumVAddrs); + if (!UseNSA && Intr->NumVAddrs > 1) + convertImageAddrToPacked(B, MI, ArgOffset + Intr->VAddrStart, + Intr->NumVAddrs); } int Flags = 0; diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index c223e1a8bc265..f56b8728e64c0 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -896,9 +896,25 @@ class ImageDimIntrinsicInfo { AMDGPUDimProps Dim = I.P.Dim; AMDGPUImageDimIntrinsicEval DimEval = AMDGPUImageDimIntrinsicEval; + bits<8> NumGradients = DimEval.NumGradientArgs; + bits<8> NumDmask = DimEval.NumDmaskArgs; + bits<8> NumData = DimEval.NumDataArgs; + bits<8> NumVAddrs = DimEval.NumVAddrArgs; + bits<8> NumArgs = !add(DimEval.CachePolicyArgIndex, 1); + + bits<8> DMaskIndex = DimEval.DmaskArgIndex; + bits<8> VAddrStart = DimEval.VAddrArgIndex; bits<8> GradientStart = DimEval.GradientArgIndex; bits<8> CoordStart = DimEval.CoordArgIndex; + bits<8> LodIndex = DimEval.LodArgIndex; + bits<8> MipIndex = DimEval.MipArgIndex; bits<8> VAddrEnd = !add(DimEval.VAddrArgIndex, DimEval.NumVAddrArgs); + bits<8> RsrcIndex = DimEval.RsrcArgIndex; + bits<8> SampIndex = DimEval.SampArgIndex; + bits<8> UnormIndex = DimEval.UnormArgIndex; + bits<8> TexFailCtrlIndex = DimEval.TexFailCtrlArgIndex; + bits<8> CachePolicyIndex = DimEval.CachePolicyArgIndex; + bits<8> GradientTyArg = !add(I.P.NumRetAndDataAnyTypes, !foldl(0, I.P.ExtraAddrArgs, cnt, arg, !add(cnt, arg.Type.isAny))); bits<8> CoordTyArg = !add(GradientTyArg, !if(I.P.Gradients, 1, 0)); @@ -906,7 +922,10 @@ class ImageDimIntrinsicInfo { def ImageDimIntrinsicTable : GenericTable { let FilterClass = "ImageDimIntrinsicInfo"; - let Fields = ["Intr", "BaseOpcode", "Dim", "GradientStart", "CoordStart", "VAddrEnd", "GradientTyArg", "CoordTyArg"]; + let Fields = ["Intr", "BaseOpcode", "Dim", "NumGradients", "NumDmask", "NumData", "NumVAddrs", "NumArgs", + "DMaskIndex", "VAddrStart", "GradientStart", "CoordStart", "LodIndex", "MipIndex", "VAddrEnd", + "RsrcIndex", "SampIndex", "UnormIndex", "TexFailCtrlIndex", "CachePolicyIndex", + "GradientTyArg", "CoordTyArg"]; GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode; GenericEnum TypeOf_Dim = MIMGDim; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 73408346fbae9..1725c56e0db32 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5945,7 +5945,7 @@ static void packImageA16AddressToDwords(SelectionDAG &DAG, SDValue Op, SDValue SITargetLowering::lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr, - SelectionDAG &DAG) const { + SelectionDAG &DAG, bool WithChain) const { SDLoc DL(Op); MachineFunction &MF = DAG.getMachineFunction(); const GCNSubtarget* ST = &MF.getSubtarget(); @@ -5968,7 +5968,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op, int NumVDataDwords; bool AdjustRetType = false; - unsigned AddrIdx; // Index of first address argument + // Offset of intrinsic arguments + const unsigned ArgOffset = WithChain ? 2 : 1; + unsigned DMask; unsigned DMaskLanes = 0; @@ -5986,15 +5988,13 @@ SDValue SITargetLowering::lowerImage(SDValue Op, ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32; DMask = Is64Bit ? 0xf : 0x3; NumVDataDwords = Is64Bit ? 4 : 2; - AddrIdx = 4; } else { DMask = Is64Bit ? 0x3 : 0x1; NumVDataDwords = Is64Bit ? 2 : 1; - AddrIdx = 3; } } else { - unsigned DMaskIdx = BaseOpcode->Store ? 3 : isa(Op) ? 2 : 1; - auto DMaskConst = cast(Op.getOperand(DMaskIdx)); + auto *DMaskConst = + cast(Op.getOperand(ArgOffset + Intr->DMaskIndex)); DMask = DMaskConst->getZExtValue(); DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask); @@ -6034,56 +6034,45 @@ SDValue SITargetLowering::lowerImage(SDValue Op, AdjustRetType = true; } - - AddrIdx = DMaskIdx + 1; } - unsigned NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0; - unsigned NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0; - unsigned NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0; - unsigned NumVAddrs = BaseOpcode->NumExtraArgs + NumGradients + - NumCoords + NumLCM; - unsigned NumMIVAddrs = NumVAddrs; - + unsigned VAddrEnd = ArgOffset + Intr->VAddrEnd; SmallVector VAddrs; // Optimize _L to _LZ when _L is zero if (LZMappingInfo) { - if (auto ConstantLod = - dyn_cast(Op.getOperand(AddrIdx+NumVAddrs-1))) { + if (auto *ConstantLod = dyn_cast( + Op.getOperand(ArgOffset + Intr->LodIndex))) { if (ConstantLod->isZero() || ConstantLod->isNegative()) { IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l - NumMIVAddrs--; // remove 'lod' + VAddrEnd--; // remove 'lod' } } } // Optimize _mip away, when 'lod' is zero if (MIPMappingInfo) { - if (auto ConstantLod = - dyn_cast(Op.getOperand(AddrIdx+NumVAddrs-1))) { + if (auto *ConstantLod = dyn_cast( + Op.getOperand(ArgOffset + Intr->MipIndex))) { if (ConstantLod->isNullValue()) { IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip - NumMIVAddrs--; // remove 'lod' + VAddrEnd--; // remove 'mip' } } } // Push back extra arguments. - for (unsigned I = 0; I < BaseOpcode->NumExtraArgs; I++) - VAddrs.push_back(Op.getOperand(AddrIdx + I)); + for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++) + VAddrs.push_back(Op.getOperand(ArgOffset + I)); // Check for 16 bit addresses or derivatives and pack if true. - unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs; - unsigned CoordIdx = DimIdx + NumGradients; - unsigned CoordsEnd = AddrIdx + NumMIVAddrs; - - MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType(); + MVT VAddrVT = + Op.getOperand(ArgOffset + Intr->GradientStart).getSimpleValueType(); MVT VAddrScalarVT = VAddrVT.getScalarType(); MVT PackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16; IsG16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16; - VAddrVT = Op.getOperand(CoordIdx).getSimpleValueType(); + VAddrVT = Op.getOperand(ArgOffset + Intr->CoordStart).getSimpleValueType(); VAddrScalarVT = VAddrVT.getScalarType(); IsA16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16; if (IsA16 || IsG16) { @@ -6118,17 +6107,18 @@ SDValue SITargetLowering::lowerImage(SDValue Op, } // Don't compress addresses for G16 - const int PackEndIdx = IsA16 ? CoordsEnd : CoordIdx; - packImageA16AddressToDwords(DAG, Op, PackVectorVT, VAddrs, DimIdx, - PackEndIdx, NumGradients); + const int PackEndIdx = IsA16 ? VAddrEnd : (ArgOffset + Intr->CoordStart); + packImageA16AddressToDwords(DAG, Op, PackVectorVT, VAddrs, + ArgOffset + Intr->GradientStart, PackEndIdx, + Intr->NumGradients); if (!IsA16) { // Add uncompressed address - for (unsigned I = CoordIdx; I < CoordsEnd; I++) + for (unsigned I = ArgOffset + Intr->CoordStart; I < VAddrEnd; I++) VAddrs.push_back(Op.getOperand(I)); } } else { - for (unsigned I = DimIdx; I < CoordsEnd; I++) + for (unsigned I = ArgOffset + Intr->GradientStart; I < VAddrEnd; I++) VAddrs.push_back(Op.getOperand(I)); } @@ -6151,22 +6141,19 @@ SDValue SITargetLowering::lowerImage(SDValue Op, SDValue True = DAG.getTargetConstant(1, DL, MVT::i1); SDValue False = DAG.getTargetConstant(0, DL, MVT::i1); - unsigned CtrlIdx; // Index of texfailctrl argument SDValue Unorm; if (!BaseOpcode->Sampler) { Unorm = True; - CtrlIdx = AddrIdx + NumVAddrs + 1; } else { auto UnormConst = - cast(Op.getOperand(AddrIdx + NumVAddrs + 2)); + cast(Op.getOperand(ArgOffset + Intr->UnormIndex)); Unorm = UnormConst->getZExtValue() ? True : False; - CtrlIdx = AddrIdx + NumVAddrs + 3; } SDValue TFE; SDValue LWE; - SDValue TexFail = Op.getOperand(CtrlIdx); + SDValue TexFail = Op.getOperand(ArgOffset + Intr->TexFailCtrlIndex); bool IsTexFail = false; if (!parseTexFail(TexFail, DAG, &TFE, &LWE, IsTexFail)) return Op; @@ -6213,12 +6200,12 @@ SDValue SITargetLowering::lowerImage(SDValue Op, SDValue DLC; if (BaseOpcode->Atomic) { GLC = True; // TODO no-return optimization - if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, nullptr, &SLC, - IsGFX10 ? &DLC : nullptr)) + if (!parseCachePolicy(Op.getOperand(ArgOffset + Intr->CachePolicyIndex), + DAG, nullptr, &SLC, IsGFX10 ? &DLC : nullptr)) return Op; } else { - if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, &GLC, &SLC, - IsGFX10 ? &DLC : nullptr)) + if (!parseCachePolicy(Op.getOperand(ArgOffset + Intr->CachePolicyIndex), + DAG, &GLC, &SLC, IsGFX10 ? &DLC : nullptr)) return Op; } @@ -6231,9 +6218,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op, } else { Ops.push_back(VAddr); } - Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs)); // rsrc + Ops.push_back(Op.getOperand(ArgOffset + Intr->RsrcIndex)); if (BaseOpcode->Sampler) - Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs + 1)); // sampler + Ops.push_back(Op.getOperand(ArgOffset + Intr->SampIndex)); Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32)); if (IsGFX10) Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32)); @@ -6714,7 +6701,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) - return lowerImage(Op, ImageDimIntr, DAG); + return lowerImage(Op, ImageDimIntr, DAG, false); return Op; } @@ -7376,7 +7363,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrID)) - return lowerImage(Op, ImageDimIntr, DAG); + return lowerImage(Op, ImageDimIntr, DAG, true); return SDValue(); } @@ -7716,7 +7703,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, default: { if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) - return lowerImage(Op, ImageDimIntr, DAG); + return lowerImage(Op, ImageDimIntr, DAG, true); return Op; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 6bfa33cef7ced..9aa307f7bc594 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -59,7 +59,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op, MVT VT, unsigned Offset) const; SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr, - SelectionDAG &DAG) const; + SelectionDAG &DAG, bool WithChain) const; SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset, SDValue CachePolicy, SelectionDAG &DAG) const; From 71cf97e95b8c888367284d1d12925f79b38034eb Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 24 Sep 2020 14:50:41 +0100 Subject: [PATCH 015/321] Reland "[lldb] Don't send invalid region addresses to lldb server" This reverts commit c65627a1fe3be7521fc232d633bb6df577f55269. The test immediately after the new invalid symbol test was failing on Windows. This was because when we called VirtualQueryEx to get the region info for 0x0, even if it succeeded we would call GetLastError. Which must have picked up the last error that was set while trying to lookup "not_an_address". Which happened to be 2. ("The system cannot find the file specified.") To fix this only call GetLastError when we know VirtualQueryEx has failed. (when it returns 0, which we were also checking for anyway) Also convert memory region to an early return style to make the logic clearer. Reviewed By: labath, stella.stamenova Differential Revision: https://reviews.llvm.org/D88229 --- lldb/source/Commands/CommandObjectMemory.cpp | 103 +++++++++--------- .../Windows/Common/ProcessDebugger.cpp | 6 +- .../memory-region/TestMemoryRegion.py | 6 + 3 files changed, 62 insertions(+), 53 deletions(-) diff --git a/lldb/source/Commands/CommandObjectMemory.cpp b/lldb/source/Commands/CommandObjectMemory.cpp index 474c377101493..d4c2808dc159e 100644 --- a/lldb/source/Commands/CommandObjectMemory.cpp +++ b/lldb/source/Commands/CommandObjectMemory.cpp @@ -1687,63 +1687,66 @@ class CommandObjectMemoryRegion : public CommandObjectParsed { protected: bool DoExecute(Args &command, CommandReturnObject &result) override { ProcessSP process_sp = m_exe_ctx.GetProcessSP(); - if (process_sp) { - Status error; - lldb::addr_t load_addr = m_prev_end_addr; + if (!process_sp) { m_prev_end_addr = LLDB_INVALID_ADDRESS; + result.AppendError("invalid process"); + result.SetStatus(eReturnStatusFailed); + return false; + } + + Status error; + lldb::addr_t load_addr = m_prev_end_addr; + m_prev_end_addr = LLDB_INVALID_ADDRESS; - const size_t argc = command.GetArgumentCount(); - if (argc > 1 || (argc == 0 && load_addr == LLDB_INVALID_ADDRESS)) { - result.AppendErrorWithFormat("'%s' takes one argument:\nUsage: %s\n", - m_cmd_name.c_str(), m_cmd_syntax.c_str()); + const size_t argc = command.GetArgumentCount(); + if (argc > 1 || (argc == 0 && load_addr == LLDB_INVALID_ADDRESS)) { + result.AppendErrorWithFormat("'%s' takes one argument:\nUsage: %s\n", + m_cmd_name.c_str(), m_cmd_syntax.c_str()); + result.SetStatus(eReturnStatusFailed); + return false; + } + + if (argc == 1) { + auto load_addr_str = command[0].ref(); + load_addr = OptionArgParser::ToAddress(&m_exe_ctx, load_addr_str, + LLDB_INVALID_ADDRESS, &error); + if (error.Fail() || load_addr == LLDB_INVALID_ADDRESS) { + result.AppendErrorWithFormat("invalid address argument \"%s\": %s\n", + command[0].c_str(), error.AsCString()); result.SetStatus(eReturnStatusFailed); - } else { - if (command.GetArgumentCount() == 1) { - auto load_addr_str = command[0].ref(); - load_addr = OptionArgParser::ToAddress(&m_exe_ctx, load_addr_str, - LLDB_INVALID_ADDRESS, &error); - if (error.Fail() || load_addr == LLDB_INVALID_ADDRESS) { - result.AppendErrorWithFormat( - "invalid address argument \"%s\": %s\n", command[0].c_str(), - error.AsCString()); - result.SetStatus(eReturnStatusFailed); - } - } + return false; + } + } - lldb_private::MemoryRegionInfo range_info; - error = process_sp->GetMemoryRegionInfo(load_addr, range_info); - if (error.Success()) { - lldb_private::Address addr; - ConstString name = range_info.GetName(); - ConstString section_name; - if (process_sp->GetTarget().ResolveLoadAddress(load_addr, addr)) { - SectionSP section_sp(addr.GetSection()); - if (section_sp) { - // Got the top most section, not the deepest section - while (section_sp->GetParent()) - section_sp = section_sp->GetParent(); - section_name = section_sp->GetName(); - } - } - result.AppendMessageWithFormatv( - "[{0:x16}-{1:x16}) {2:r}{3:w}{4:x}{5}{6}{7}{8}\n", - range_info.GetRange().GetRangeBase(), - range_info.GetRange().GetRangeEnd(), range_info.GetReadable(), - range_info.GetWritable(), range_info.GetExecutable(), - name ? " " : "", name, section_name ? " " : "", section_name); - m_prev_end_addr = range_info.GetRange().GetRangeEnd(); - result.SetStatus(eReturnStatusSuccessFinishResult); - } else { - result.SetStatus(eReturnStatusFailed); - result.AppendErrorWithFormat("%s\n", error.AsCString()); + lldb_private::MemoryRegionInfo range_info; + error = process_sp->GetMemoryRegionInfo(load_addr, range_info); + if (error.Success()) { + lldb_private::Address addr; + ConstString name = range_info.GetName(); + ConstString section_name; + if (process_sp->GetTarget().ResolveLoadAddress(load_addr, addr)) { + SectionSP section_sp(addr.GetSection()); + if (section_sp) { + // Got the top most section, not the deepest section + while (section_sp->GetParent()) + section_sp = section_sp->GetParent(); + section_name = section_sp->GetName(); } } - } else { - m_prev_end_addr = LLDB_INVALID_ADDRESS; - result.AppendError("invalid process"); - result.SetStatus(eReturnStatusFailed); + result.AppendMessageWithFormatv( + "[{0:x16}-{1:x16}) {2:r}{3:w}{4:x}{5}{6}{7}{8}\n", + range_info.GetRange().GetRangeBase(), + range_info.GetRange().GetRangeEnd(), range_info.GetReadable(), + range_info.GetWritable(), range_info.GetExecutable(), name ? " " : "", + name, section_name ? " " : "", section_name); + m_prev_end_addr = range_info.GetRange().GetRangeEnd(); + result.SetStatus(eReturnStatusSuccessFinishResult); + return true; } - return result.Succeeded(); + + result.SetStatus(eReturnStatusFailed); + result.AppendErrorWithFormat("%s\n", error.AsCString()); + return false; } const char *GetRepeatCommand(Args ¤t_command_args, diff --git a/lldb/source/Plugins/Process/Windows/Common/ProcessDebugger.cpp b/lldb/source/Plugins/Process/Windows/Common/ProcessDebugger.cpp index 07a81cdf69ccd..91b3311dc8575 100644 --- a/lldb/source/Plugins/Process/Windows/Common/ProcessDebugger.cpp +++ b/lldb/source/Plugins/Process/Windows/Common/ProcessDebugger.cpp @@ -405,7 +405,8 @@ Status ProcessDebugger::GetMemoryRegionInfo(lldb::addr_t vm_addr, MEMORY_BASIC_INFORMATION mem_info = {}; SIZE_T result = ::VirtualQueryEx(handle, addr, &mem_info, sizeof(mem_info)); if (result == 0) { - if (::GetLastError() == ERROR_INVALID_PARAMETER) { + DWORD last_error = ::GetLastError(); + if (last_error == ERROR_INVALID_PARAMETER) { // ERROR_INVALID_PARAMETER is returned if VirtualQueryEx is called with // an address past the highest accessible address. We should return a // range from the vm_addr to LLDB_INVALID_ADDRESS @@ -417,7 +418,7 @@ Status ProcessDebugger::GetMemoryRegionInfo(lldb::addr_t vm_addr, info.SetMapped(MemoryRegionInfo::eNo); return error; } else { - error.SetError(::GetLastError(), eErrorTypeWin32); + error.SetError(last_error, eErrorTypeWin32); LLDB_LOG(log, "VirtualQueryEx returned error {0} while getting memory " "region info for address {1:x}", @@ -460,7 +461,6 @@ Status ProcessDebugger::GetMemoryRegionInfo(lldb::addr_t vm_addr, info.SetMapped(MemoryRegionInfo::eNo); } - error.SetError(::GetLastError(), eErrorTypeWin32); LLDB_LOGV(log, "Memory region info for address {0}: readable={1}, " "executable={2}, writable={3}", diff --git a/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py b/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py index 283cc945ed09a..61e64d44e7945 100644 --- a/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py +++ b/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py @@ -41,6 +41,12 @@ def test(self): self.assertFalse(result.Succeeded()) self.assertRegexpMatches(result.GetError(), "Usage: memory region ADDR") + # Test that when the address fails to parse, we show an error and do not continue + interp.HandleCommand("memory region not_an_address", result) + self.assertFalse(result.Succeeded()) + self.assertEqual(result.GetError(), + "error: invalid address argument \"not_an_address\": address expression \"not_an_address\" evaluation failed\n") + # Now let's print the memory region starting at 0 which should always work. interp.HandleCommand("memory region 0x0", result) self.assertTrue(result.Succeeded()) From 7f05fe1aeeb005b552c6a3093b61659e7b578b14 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Mon, 5 Oct 2020 12:52:03 +0200 Subject: [PATCH 016/321] [AST][RecoveryExpr] Fix a crash on undeduced type. We should not capture the type if the function return type is undeduced. Reviewed By: adamcz Differential Revision: https://reviews.llvm.org/D87350 --- clang/lib/Sema/SemaOverload.cpp | 7 ++++++- clang/test/AST/ast-dump-recovery.cpp | 6 ++++++ clang/test/SemaCXX/recovery-expr-type.cpp | 6 ++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 0c252a488fea3..4696ed56dc71d 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -12880,7 +12880,12 @@ static QualType chooseRecoveryType(OverloadCandidateSet &CS, for (const auto &C : CS) ConsiderCandidate(C); - return Result.getValueOr(QualType()); + if (!Result) + return QualType(); + auto Value = Result.getValue(); + if (Value.isNull() || Value->isUndeducedType()) + return QualType(); + return Value; } /// FinishOverloadedCallExpr - given an OverloadCandidateSet, builds and returns diff --git a/clang/test/AST/ast-dump-recovery.cpp b/clang/test/AST/ast-dump-recovery.cpp index fd7c923b7e51e..69d5f80427cb0 100644 --- a/clang/test/AST/ast-dump-recovery.cpp +++ b/clang/test/AST/ast-dump-recovery.cpp @@ -126,6 +126,12 @@ void test(int x) { // CHECK-NEXT:| `-UnresolvedLookupExpr {{.*}} 'invalid' struct alignas(invalid()) Aligned {}; +auto f(); +int f(double); +// CHECK: VarDecl {{.*}} unknown_type_call 'int' +// CHECK-NEXT: `-RecoveryExpr {{.*}} '' +int unknown_type_call = f(0, 0); + void InvalidInitalizer(int x) { struct Bar { Bar(); }; // CHECK: `-VarDecl {{.*}} a1 'Bar' diff --git a/clang/test/SemaCXX/recovery-expr-type.cpp b/clang/test/SemaCXX/recovery-expr-type.cpp index 075d0147c6840..8186a812790d3 100644 --- a/clang/test/SemaCXX/recovery-expr-type.cpp +++ b/clang/test/SemaCXX/recovery-expr-type.cpp @@ -105,3 +105,9 @@ typedef int arr[]; int v = arr(); // expected-error {{array types cannot be value-initialized}} \ expected-error {{cannot initialize a variable of type 'int' with an rvalue of type 'test8::arr'}} } + +namespace test9 { +auto f(); // expected-note {{candidate function not viable}} +// verify no crash on evaluating the size of undeduced auto type. +static_assert(sizeof(f(1)), ""); // expected-error {{no matching function for call to 'f'}} +} From 96c8a17c800b2370f6d43fe67559ca10d5e44196 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Mon, 5 Oct 2020 13:14:53 +0200 Subject: [PATCH 017/321] [clangd] Remove unused using-decls in TypeHierarchyTests, NFC. --- clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp b/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp index 9f021ade02786..64831724d1be6 100644 --- a/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp +++ b/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp @@ -29,11 +29,8 @@ namespace { using ::testing::AllOf; using ::testing::ElementsAre; -using ::testing::Eq; using ::testing::Field; -using ::testing::IsEmpty; using ::testing::Matcher; -using ::testing::Pointee; using ::testing::UnorderedElementsAre; // GMock helpers for matching TypeHierarchyItem. From e70e7d1019ca8562b614a67c26995da42c0336ad Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Mon, 5 Oct 2020 14:23:41 +0300 Subject: [PATCH 018/321] [TableGen] Added a function for identification of unsupported opcodes. This change implements generation of a function which may be used by a backend to check if a given instruction is supported for a specific subtarget. Reviewers: sdesmalen Differential Revision: https://reviews.llvm.org/D88214 --- llvm/utils/TableGen/AsmMatcherEmitter.cpp | 69 +++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index 5b722b20e048d..3f05ab5b9955c 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -3094,6 +3094,67 @@ static void emitMnemonicSpellChecker(raw_ostream &OS, CodeGenTarget &Target, OS << "\n"; } +static void emitMnemonicChecker(raw_ostream &OS, + CodeGenTarget &Target, + unsigned VariantCount, + bool HasMnemonicFirst, + bool HasMnemonicAliases) { + OS << "static bool " << Target.getName() + << "CheckMnemonic(StringRef Mnemonic,\n"; + OS << " " + << "const FeatureBitset &AvailableFeatures,\n"; + OS << " " + << "unsigned VariantID) {\n"; + + if (!VariantCount) { + OS << " return false;\n"; + } else { + if (HasMnemonicAliases) { + OS << " // Process all MnemonicAliases to remap the mnemonic.\n"; + OS << " applyMnemonicAliases(Mnemonic, AvailableFeatures, VariantID);"; + OS << "\n\n"; + } + OS << " // Find the appropriate table for this asm variant.\n"; + OS << " const MatchEntry *Start, *End;\n"; + OS << " switch (VariantID) {\n"; + OS << " default: llvm_unreachable(\"invalid variant!\");\n"; + for (unsigned VC = 0; VC != VariantCount; ++VC) { + Record *AsmVariant = Target.getAsmParserVariant(VC); + int AsmVariantNo = AsmVariant->getValueAsInt("Variant"); + OS << " case " << AsmVariantNo << ": Start = std::begin(MatchTable" << VC + << "); End = std::end(MatchTable" << VC << "); break;\n"; + } + OS << " }\n\n"; + + OS << " // Search the table.\n"; + if (HasMnemonicFirst) { + OS << " auto MnemonicRange = " + "std::equal_range(Start, End, Mnemonic, LessOpcode());\n\n"; + } else { + OS << " auto MnemonicRange = std::make_pair(Start, End);\n"; + OS << " unsigned SIndex = Mnemonic.empty() ? 0 : 1;\n"; + OS << " if (!Mnemonic.empty())\n"; + OS << " MnemonicRange = " + << "std::equal_range(Start, End, Mnemonic.lower(), LessOpcode());\n\n"; + } + + OS << " if (MnemonicRange.first == MnemonicRange.second)\n"; + OS << " return false;\n\n"; + + OS << " for (const MatchEntry *it = MnemonicRange.first, " + << "*ie = MnemonicRange.second;\n"; + OS << " it != ie; ++it) {\n"; + OS << " const FeatureBitset &RequiredFeatures =\n"; + OS << " FeatureBitsets[it->RequiredFeaturesIdx];\n"; + OS << " if ((AvailableFeatures & RequiredFeatures) == "; + OS << "RequiredFeatures)\n"; + OS << " return true;\n"; + OS << " }\n"; + OS << " return false;\n"; + } + OS << "}\n"; + OS << "\n"; +} // Emit a function mapping match classes to strings, for debugging. static void emitMatchClassKindNames(std::forward_list &Infos, @@ -3909,6 +3970,14 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { emitMnemonicSpellChecker(OS, Target, VariantCount); OS << "#endif // GET_MNEMONIC_SPELL_CHECKER\n\n"; + + OS << "\n#ifdef GET_MNEMONIC_CHECKER\n"; + OS << "#undef GET_MNEMONIC_CHECKER\n\n"; + + emitMnemonicChecker(OS, Target, VariantCount, + HasMnemonicFirst, HasMnemonicAliases); + + OS << "#endif // GET_MNEMONIC_CHECKER\n\n"; } namespace llvm { From 707c3d4d4210f5386a79ef7a7e771f36bc7ad9ef Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Mon, 5 Oct 2020 20:26:27 +0900 Subject: [PATCH 019/321] [AMDGPU][RegAlloc][SplitKit] Pre-commit test for D88821 --- .../CodeGen/AMDGPU/splitkit-copy-bundle.mir | 105 ++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir index c9f3a82cf695f..423eb928b4817 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir @@ -238,3 +238,108 @@ body: | S_BRANCH %bb.2 ... + +--- +name: splitkit_copy_unbundle_reorder +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + ; RA-LABEL: name: splitkit_copy_unbundle_reorder + ; RA: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF + ; RA: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; RA: [[DEF2:%[0-9]+]]:sgpr_512 = IMPLICIT_DEF + ; RA: [[DEF2]].sub4:sgpr_512 = S_MOV_B32 -1 + ; RA: [[DEF2]].sub5:sgpr_512 = S_MOV_B32 -1 + ; RA: [[DEF2]].sub10:sgpr_512 = S_MOV_B32 -1 + ; RA: [[DEF2]].sub11:sgpr_512 = S_MOV_B32 -1 + ; RA: [[DEF2]].sub7:sgpr_512 = S_MOV_B32 -1 + ; RA: [[DEF2]].sub8:sgpr_512 = S_MOV_B32 -1 + ; RA: [[DEF2]].sub13:sgpr_512 = S_MOV_B32 -1 + ; RA: [[DEF2]].sub14:sgpr_512 = S_MOV_B32 -1 + ; RA: undef %15.sub4_sub5:sgpr_512 = COPY [[DEF2]].sub4_sub5 { + ; RA: internal %15.sub10_sub11:sgpr_512 = COPY [[DEF2]].sub10_sub11 + ; RA: internal %15.sub7:sgpr_512 = COPY [[DEF2]].sub7 + ; RA: internal %15.sub8:sgpr_512 = COPY [[DEF2]].sub8 + ; RA: internal %15.sub13:sgpr_512 = COPY [[DEF2]].sub13 + ; RA: internal %15.sub14:sgpr_512 = COPY [[DEF2]].sub14 + ; RA: } + ; RA: SI_SPILL_S512_SAVE %15, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 64 into %stack.0, align 4, addrspace 5) + ; RA: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98 + ; RA: [[SI_SPILL_S512_RESTORE:%[0-9]+]]:sgpr_512 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 64 from %stack.0, align 4, addrspace 5) + ; RA: undef %14.sub4_sub5:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub4_sub5 { + ; RA: internal %14.sub10_sub11:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub10_sub11 + ; RA: internal %14.sub7:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub7 + ; RA: internal %14.sub8:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub8 + ; RA: internal %14.sub13:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub13 + ; RA: internal %14.sub14:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub14 + ; RA: } + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub4, 0, 0 :: (dereferenceable invariant load 4) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub5, 0, 0 :: (dereferenceable invariant load 4) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub10, 0, 0 :: (dereferenceable invariant load 4) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub11, 0, 0 :: (dereferenceable invariant load 4) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub7, 0, 0 :: (dereferenceable invariant load 4) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub8, 0, 0 :: (dereferenceable invariant load 4) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub13, 0, 0 :: (dereferenceable invariant load 4) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub14, 0, 0 :: (dereferenceable invariant load 4) + ; RA: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[S_BUFFER_LOAD_DWORD_SGPR]], implicit [[S_BUFFER_LOAD_DWORD_SGPR1]], implicit [[S_BUFFER_LOAD_DWORD_SGPR2]], implicit [[S_BUFFER_LOAD_DWORD_SGPR3]], implicit [[S_BUFFER_LOAD_DWORD_SGPR4]], implicit [[S_BUFFER_LOAD_DWORD_SGPR5]], implicit [[S_BUFFER_LOAD_DWORD_SGPR6]], implicit [[S_BUFFER_LOAD_DWORD_SGPR7]] + ; VR-LABEL: name: splitkit_copy_unbundle_reorder + ; VR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF + ; VR: renamable $sgpr16 = S_MOV_B32 -1 + ; VR: renamable $sgpr17 = S_MOV_B32 -1 + ; VR: renamable $sgpr22 = S_MOV_B32 -1 + ; VR: renamable $sgpr23 = S_MOV_B32 -1 + ; VR: renamable $sgpr19 = S_MOV_B32 -1 + ; VR: renamable $sgpr20 = S_MOV_B32 -1 + ; VR: renamable $sgpr25 = S_MOV_B32 -1 + ; VR: renamable $sgpr26 = S_MOV_B32 -1 + ; VR: SI_SPILL_S512_SAVE killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 64 into %stack.0, align 4, addrspace 5) + ; VR: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98 + ; VR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 64 from %stack.0, align 4, addrspace 5) + ; VR: renamable $sgpr16 = COPY killed renamable $sgpr20 + ; VR: renamable $sgpr15 = COPY killed renamable $sgpr19 + ; VR: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr22_sgpr23 + ; VR: renamable $sgpr21 = COPY killed renamable $sgpr25 + ; VR: renamable $sgpr12_sgpr13 = COPY renamable $sgpr16_sgpr17 + ; VR: renamable $sgpr22 = COPY killed renamable $sgpr26 + ; VR: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = IMPLICIT_DEF + ; VR: renamable $sgpr8 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr12, 0, 0 :: (dereferenceable invariant load 4) + ; VR: renamable $sgpr9 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr13, 0, 0 :: (dereferenceable invariant load 4) + ; VR: renamable $sgpr14 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr15, 0, 0 :: (dereferenceable invariant load 4) + ; VR: renamable $sgpr15 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr16, 0, 0 :: (dereferenceable invariant load 4) + ; VR: renamable $sgpr10_sgpr11 = IMPLICIT_DEF + ; VR: renamable $sgpr12 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr18, 0, 0 :: (dereferenceable invariant load 4) + ; VR: renamable $sgpr13 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr19, 0, 0 :: (dereferenceable invariant load 4) + ; VR: renamable $sgpr16 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr21, 0, 0 :: (dereferenceable invariant load 4) + ; VR: renamable $sgpr17 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr22, 0, 0 :: (dereferenceable invariant load 4) + ; VR: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr10_sgpr11, implicit killed renamable $sgpr8, implicit killed renamable $sgpr9, implicit killed renamable $sgpr12, implicit killed renamable $sgpr13, implicit killed renamable $sgpr14, implicit killed renamable $sgpr15, implicit killed renamable $sgpr16, implicit killed renamable $sgpr17 + %0:sgpr_128 = IMPLICIT_DEF + %1:sreg_64 = IMPLICIT_DEF + %2:sgpr_512 = IMPLICIT_DEF + + %2.sub4:sgpr_512 = S_MOV_B32 -1 + %2.sub5:sgpr_512 = S_MOV_B32 -1 + %2.sub10:sgpr_512 = S_MOV_B32 -1 + %2.sub11:sgpr_512 = S_MOV_B32 -1 + %2.sub7:sgpr_512 = S_MOV_B32 -1 + %2.sub8:sgpr_512 = S_MOV_B32 -1 + %2.sub13:sgpr_512 = S_MOV_B32 -1 + %2.sub14:sgpr_512 = S_MOV_B32 -1 + + ; Clobber registers + S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98 + + %5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub4:sgpr_512, 0, 0 :: (dereferenceable invariant load 4) + %6:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub5:sgpr_512, 0, 0 :: (dereferenceable invariant load 4) + %7:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub10:sgpr_512, 0, 0 :: (dereferenceable invariant load 4) + %8:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub11:sgpr_512, 0, 0 :: (dereferenceable invariant load 4) + %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub7:sgpr_512, 0, 0 :: (dereferenceable invariant load 4) + %10:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub8:sgpr_512, 0, 0 :: (dereferenceable invariant load 4) + %11:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub13:sgpr_512, 0, 0 :: (dereferenceable invariant load 4) + %12:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub14:sgpr_512, 0, 0 :: (dereferenceable invariant load 4) + + S_NOP 0, implicit %0, implicit %1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12 + +... From 007dd12d546814977519b33ca38b1cc8b31fee26 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Thu, 1 Oct 2020 17:45:57 +0200 Subject: [PATCH 020/321] [ASTImporter][AST] Fix structural equivalency crash on dependent FieldDecl Differential Revision: https://reviews.llvm.org/D88665 --- clang/lib/AST/ASTStructuralEquivalence.cpp | 45 ++----------------- clang/test/ASTMerge/struct/test.c | 12 +---- .../AST/StructuralEquivalenceTest.cpp | 33 ++++++++++++++ 3 files changed, 37 insertions(+), 53 deletions(-) diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp index 98e1b7eeb8c4c..2bc5f39b817e7 100644 --- a/clang/lib/AST/ASTStructuralEquivalence.cpp +++ b/clang/lib/AST/ASTStructuralEquivalence.cpp @@ -1256,48 +1256,9 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, return false; } - if (Field1->isBitField() != Field2->isBitField()) { - if (Context.Complain) { - Context.Diag2( - Owner2->getLocation(), - Context.getApplicableDiagnostic(diag::err_odr_tag_type_inconsistent)) - << Context.ToCtx.getTypeDeclType(Owner2); - if (Field1->isBitField()) { - Context.Diag1(Field1->getLocation(), diag::note_odr_bit_field) - << Field1->getDeclName() << Field1->getType() - << Field1->getBitWidthValue(Context.FromCtx); - Context.Diag2(Field2->getLocation(), diag::note_odr_not_bit_field) - << Field2->getDeclName(); - } else { - Context.Diag2(Field2->getLocation(), diag::note_odr_bit_field) - << Field2->getDeclName() << Field2->getType() - << Field2->getBitWidthValue(Context.ToCtx); - Context.Diag1(Field1->getLocation(), diag::note_odr_not_bit_field) - << Field1->getDeclName(); - } - } - return false; - } - - if (Field1->isBitField()) { - // Make sure that the bit-fields are the same length. - unsigned Bits1 = Field1->getBitWidthValue(Context.FromCtx); - unsigned Bits2 = Field2->getBitWidthValue(Context.ToCtx); - - if (Bits1 != Bits2) { - if (Context.Complain) { - Context.Diag2(Owner2->getLocation(), - Context.getApplicableDiagnostic( - diag::err_odr_tag_type_inconsistent)) - << Context.ToCtx.getTypeDeclType(Owner2); - Context.Diag2(Field2->getLocation(), diag::note_odr_bit_field) - << Field2->getDeclName() << Field2->getType() << Bits2; - Context.Diag1(Field1->getLocation(), diag::note_odr_bit_field) - << Field1->getDeclName() << Field1->getType() << Bits1; - } - return false; - } - } + if (Field1->isBitField()) + return IsStructurallyEquivalent(Context, Field1->getBitWidth(), + Field2->getBitWidth()); return true; } diff --git a/clang/test/ASTMerge/struct/test.c b/clang/test/ASTMerge/struct/test.c index 9ac66d17f60ef..3e11dd9e07a85 100644 --- a/clang/test/ASTMerge/struct/test.c +++ b/clang/test/ASTMerge/struct/test.c @@ -21,16 +21,6 @@ // CHECK: struct1.c:27:8: note: no corresponding field here // CHECK: struct2.c:24:31: warning: external variable 'x4' declared with incompatible types in different translation units ('struct S4' vs. 'struct S4') // CHECK: struct1.c:27:22: note: declared here with type 'struct S4' -// CHECK: struct1.c:33:8: warning: type 'struct S6' has incompatible definitions in different translation units -// CHECK: struct1.c:33:33: note: bit-field 'j' with type 'unsigned int' and length 8 here -// CHECK: struct2.c:30:33: note: field 'j' is not a bit-field -// CHECK: struct2.c:30:38: warning: external variable 'x6' declared with incompatible types in different translation units ('struct S6' vs. 'struct S6') -// CHECK: struct1.c:33:42: note: declared here with type 'struct S6' -// CHECK: struct1.c:36:8: warning: type 'struct S7' has incompatible definitions in different translation units -// CHECK: struct1.c:36:33: note: bit-field 'j' with type 'unsigned int' and length 8 here -// CHECK: struct2.c:33:33: note: bit-field 'j' with type 'unsigned int' and length 16 here -// CHECK: struct2.c:33:43: warning: external variable 'x7' declared with incompatible types in different translation units ('struct S7' vs. 'struct S7') -// CHECK: struct1.c:36:42: note: declared here with type 'struct S7' // CHECK: struct1.c:56:10: warning: type 'struct DeeperError' has incompatible definitions in different translation units // CHECK: struct1.c:56:35: note: field 'f' has type 'int' here // CHECK: struct2.c:53:37: note: field 'f' has type 'float' here @@ -52,4 +42,4 @@ // CHECK: struct2.c:129:9: note: field 'S' has type 'struct (anonymous struct at [[PATH_TO_INPUTS]]struct2.c:127:7)' here // CHECK: struct2.c:138:3: warning: external variable 'x16' declared with incompatible types in different translation units ('struct DeepUnnamedError' vs. 'struct DeepUnnamedError') // CHECK: struct1.c:141:3: note: declared here with type 'struct DeepUnnamedError' -// CHECK: 20 warnings generated +// CHECK: 17 warnings generated diff --git a/clang/unittests/AST/StructuralEquivalenceTest.cpp b/clang/unittests/AST/StructuralEquivalenceTest.cpp index c83bf019bb65e..26bd81ce13191 100644 --- a/clang/unittests/AST/StructuralEquivalenceTest.cpp +++ b/clang/unittests/AST/StructuralEquivalenceTest.cpp @@ -976,6 +976,39 @@ TEST_F(StructuralEquivalenceTemplateTest, DifferentTemplateArgKind) { EXPECT_FALSE(testStructuralMatch(t)); } +TEST_F(StructuralEquivalenceTemplateTest, BitFieldDecl) { + const char *Code = "class foo { int a : 2; };"; + auto t = makeNamedDecls(Code, Code, Lang_CXX03); + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceTemplateTest, BitFieldDeclDifferentWidth) { + auto t = makeNamedDecls("class foo { int a : 2; };", + "class foo { int a : 4; };", Lang_CXX03); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceTemplateTest, DependentBitFieldDecl) { + const char *Code = "template class foo { int a : sizeof(T); };"; + auto t = makeNamedDecls(Code, Code, Lang_CXX03); + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceTemplateTest, DependentBitFieldDeclDifferentVal) { + auto t = makeNamedDecls( + "template class foo { int a : sizeof(A); };", + "template class foo { int a : sizeof(B); };", + Lang_CXX03); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceTemplateTest, DependentBitFieldDeclDifferentVal2) { + auto t = makeNamedDecls( + "template class foo { int a : sizeof(A); };", + "template class foo { int a : sizeof(A) + 1; };", Lang_CXX03); + EXPECT_FALSE(testStructuralMatch(t)); +} + TEST_F(StructuralEquivalenceTemplateTest, ExplicitBoolSame) { auto Decls = makeNamedDecls( "template struct foo {explicit(b) foo(int);};", From 2cd7b0e1305a71f4f9d89f25b140da641f2693f4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 5 Oct 2020 13:45:27 +0100 Subject: [PATCH 021/321] [ValueTracking] canCreateUndefOrPoison - use APInt to check bounds instead of getZExtValue(). Fixes OSS Fuzz #26135 --- llvm/lib/Analysis/ValueTracking.cpp | 5 ++--- llvm/test/Transforms/InstCombine/shift.ll | 26 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 11eb5f303c550..e78beb04e5eaf 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4813,7 +4813,7 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) { bool Safe = llvm::all_of(ShiftAmounts, [](Constant *C) { auto *CI = dyn_cast(C); - return CI && CI->getZExtValue() < C->getType()->getIntegerBitWidth(); + return CI && CI->getValue().ult(C->getType()->getIntegerBitWidth()); }); return !Safe; } @@ -4836,8 +4836,7 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) { auto *VTy = cast(Op->getOperand(0)->getType()); unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1; auto *Idx = dyn_cast(Op->getOperand(IdxOp)); - if (!Idx || - Idx->getZExtValue() >= VTy->getElementCount().getKnownMinValue()) + if (!Idx || Idx->getValue().uge(VTy->getElementCount().getKnownMinValue())) return true; return false; } diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index 714a08d247a24..ea0954c02b4e6 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -1684,3 +1684,29 @@ define void @ashr_out_of_range(i177* %A) { ret void } +; OSS Fuzz #26135 +; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=26135 +define void @ashr_out_of_range_1(i177* %A) { +; CHECK-LABEL: @ashr_out_of_range_1( +; CHECK-NEXT: [[L:%.*]] = load i177, i177* [[A:%.*]], align 4 +; CHECK-NEXT: [[G11:%.*]] = getelementptr i177, i177* [[A]], i64 -1 +; CHECK-NEXT: [[B24_LOBIT:%.*]] = ashr i177 [[L]], 175 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i177 [[B24_LOBIT]] to i64 +; CHECK-NEXT: [[G62:%.*]] = getelementptr i177, i177* [[G11]], i64 [[TMP1]] +; CHECK-NEXT: store i177 0, i177* [[G62]], align 4 +; CHECK-NEXT: ret void +; + %L = load i177, i177* %A, align 4 + %B5 = udiv i177 %L, -1 + %B4 = add i177 %B5, -1 + %B = and i177 %B4, %L + %B2 = add i177 %B, -1 + %G11 = getelementptr i177, i177* %A, i177 %B2 + %B6 = mul i177 %B5, %B2 + %B24 = ashr i177 %L, %B6 + %C17 = icmp sgt i177 %B, %B24 + %G62 = getelementptr i177, i177* %G11, i1 %C17 + %B28 = urem i177 %B24, %B6 + store i177 %B28, i177* %G62, align 4 + ret void +} From ff86acbb79c49128a93f074e44d4c75b68a2b88b Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 5 Oct 2020 13:46:15 +0100 Subject: [PATCH 022/321] [LV] Regenerate test. NFC This just reruns the update script to add the new [[LOOP0:!llvm.loop !.*]] checks to remove them from other diffs. --- .../Transforms/LoopVectorize/X86/pr35432.ll | 4 +-- .../LoopVectorize/X86/tail_loop_folding.ll | 12 ++++---- .../LoopVectorize/if-pred-stores.ll | 28 +++++++++---------- .../pr46525-expander-insertpoint.ll | 2 +- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll index 1a1f21f89a348..e71c40419f9f1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -87,7 +87,7 @@ define i32 @main() local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP31:%.*]] = zext i8 [[TMP29]] to i32 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP27]], [[TMP26]] ; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) @@ -104,7 +104,7 @@ define i32 @main() local_unnamed_addr #0 { ; CHECK-NEXT: [[DEC]] = add i8 [[C_04]], -1 ; CHECK-NEXT: [[CONV5:%.*]] = zext i8 [[DEC]] to i32 ; CHECK-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP2]], [[CONV5]] -; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], [[LOOP2:!llvm.loop !.*]] ; CHECK: for.cond4.for.inc9_crit_edge: ; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i32 [[INC_LCSSA]], i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16 diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll index f405b8734615d..044128c8ee74d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll @@ -33,7 +33,7 @@ define dso_local void @tail_folding_enabled(i32* noalias nocapture %A, i32* noal ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[TMP8]], <8 x i32>* [[TMP11]], i32 4, <8 x i1> [[TMP2]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 432 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -52,7 +52,7 @@ define dso_local void @tail_folding_enabled(i32* noalias nocapture %A, i32* noal ; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX4]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 430 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; entry: br label %for.body @@ -104,7 +104,7 @@ define dso_local void @tail_folding_disabled(i32* noalias nocapture %A, i32* noa ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[TMP8]], <8 x i32>* [[TMP11]], i32 4, <8 x i1> [[TMP2]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 432 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -123,7 +123,7 @@ define dso_local void @tail_folding_disabled(i32* noalias nocapture %A, i32* noa ; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX4]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 430 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop !5 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; entry: br label %for.body @@ -193,7 +193,7 @@ define i32 @reduction_i32(i32* nocapture readonly %A, i32* nocapture readonly %B ; CHECK-NEXT: [[TMP15:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP13]], <8 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6 +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP15]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -213,7 +213,7 @@ define i32 @reduction_i32(i32* nocapture readonly %A, i32* nocapture readonly %B ; CHECK-NEXT: [[SUM_1]] = add nuw nsw i32 [[ADD]], [[SUM_0]] ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop !7 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP7:!llvm.loop !.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_1]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[SUM_1_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index c1356fedce124..9a07c1c94b37d 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -34,7 +34,7 @@ define i32 @test(i32* nocapture %f) #0 { ; UNROLL: pred.store.continue3: ; UNROLL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 -; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 128, 128 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] @@ -51,7 +51,7 @@ define i32 @test(i32* nocapture %f) #0 { ; UNROLL: for.inc: ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; UNROLL: for.end: ; UNROLL-NEXT: ret i32 0 ; @@ -84,7 +84,7 @@ define i32 @test(i32* nocapture %f) #0 { ; UNROLL-NOSIMPLIFY: pred.store.continue3: ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; UNROLL-NOSIMPLIFY: middle.block: ; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 128, 128 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -104,7 +104,7 @@ define i32 @test(i32* nocapture %f) #0 { ; UNROLL-NOSIMPLIFY: for.inc: ; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; UNROLL-NOSIMPLIFY: for.end: ; UNROLL-NOSIMPLIFY-NEXT: ret i32 0 ; @@ -139,7 +139,7 @@ define i32 @test(i32* nocapture %f) #0 { ; VEC: pred.store.continue2: ; VEC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; VEC-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 -; VEC-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; VEC-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; VEC: middle.block: ; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 128, 128 ; VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] @@ -156,7 +156,7 @@ define i32 @test(i32* nocapture %f) #0 { ; VEC: for.inc: ; VEC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VEC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 -; VEC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 +; VEC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; VEC: for.end: ; VEC-NEXT: ret i32 0 ; @@ -255,7 +255,7 @@ define void @bug18724(i1 %cond) { ; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION8:%.*]] = add i32 [[TMP6]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !3 +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]] ; UNROLL-NOSIMPLIFY: middle.block: ; UNROLL-NOSIMPLIFY-NEXT: [[BIN_RDX:%.*]] = add i32 [[PREDPHI5]], [[PREDPHI]] ; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 1, 0 @@ -279,7 +279,7 @@ define void @bug18724(i1 %cond) { ; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32 ; UNROLL-NOSIMPLIFY-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP13]], label [[FOR_BODY14]], label [[FOR_INC26_LOOPEXIT]], !llvm.loop !4 +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP13]], label [[FOR_BODY14]], label [[FOR_INC26_LOOPEXIT]], [[LOOP4:!llvm.loop !.*]] ; UNROLL-NOSIMPLIFY: for.inc26.loopexit: ; UNROLL-NOSIMPLIFY-NEXT: [[INEWCHUNKS_2_LCSSA:%.*]] = phi i32 [ [[INEWCHUNKS_2]], [[FOR_INC23]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC26]] @@ -371,7 +371,7 @@ define void @minimal_bit_widths(i1 %c) { ; UNROLL: pred.store.continue6: ; UNROLL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !3 +; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] @@ -390,7 +390,7 @@ define void @minimal_bit_widths(i1 %c) { ; UNROLL-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 ; UNROLL-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 ; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !4 +; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -426,7 +426,7 @@ define void @minimal_bit_widths(i1 %c) { ; UNROLL-NOSIMPLIFY: pred.store.continue6: ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; UNROLL-NOSIMPLIFY: middle.block: ; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -449,7 +449,7 @@ define void @minimal_bit_widths(i1 %c) { ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !6 +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; UNROLL-NOSIMPLIFY: for.end: ; UNROLL-NOSIMPLIFY-NEXT: ret void ; @@ -489,7 +489,7 @@ define void @minimal_bit_widths(i1 %c) { ; VEC: pred.store.continue3: ; VEC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; VEC-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; VEC-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 +; VEC-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; VEC: middle.block: ; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] @@ -508,7 +508,7 @@ define void @minimal_bit_widths(i1 %c) { ; VEC-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 ; VEC-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 ; VEC-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; VEC-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !5 +; VEC-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; VEC: for.end: ; VEC-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll index f7acb2ff9ad16..0d61a87736136 100644 --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -69,7 +69,7 @@ define void @test(i16 %x, i64 %y, i32* %ptr) { ; CHECK-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; entry: %conv19 = sext i16 %x to i64 From 1425c72236766ad9107d86cb645ee8c6a3ee0eb1 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Mon, 5 Oct 2020 15:10:53 +0200 Subject: [PATCH 023/321] [clangd] Add isKeyword function. This will be used in rename for doing basic name validation. Differential Revision: https://reviews.llvm.org/D88810 --- clang-tools-extra/clangd/SourceCode.cpp | 6 ++++++ clang-tools-extra/clangd/SourceCode.h | 4 ++++ .../clangd/unittests/SourceCodeTests.cpp | 13 +++++++++++++ 3 files changed, 23 insertions(+) diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp index 0432097b43488..c6279177eba99 100644 --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -633,6 +633,12 @@ std::vector collectIdentifierRanges(llvm::StringRef Identifier, return Ranges; } +bool isKeyword(llvm::StringRef NewName, const LangOptions &LangOpts) { + // Keywords are initialized in constructor. + clang::IdentifierTable KeywordsTable(LangOpts); + return KeywordsTable.find(NewName) != KeywordsTable.end(); +} + namespace { struct NamespaceEvent { enum { diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h index 128f985a52664..be78e2f86436c 100644 --- a/clang-tools-extra/clangd/SourceCode.h +++ b/clang-tools-extra/clangd/SourceCode.h @@ -248,6 +248,10 @@ struct SpelledWord { const LangOptions &LangOpts); }; +/// Return true if the \p TokenName is in the list of reversed keywords of the +/// language. +bool isKeyword(llvm::StringRef TokenName, const LangOptions &LangOpts); + /// Heuristically determine namespaces visible at a point, without parsing Code. /// This considers using-directives and enclosing namespace-declarations that /// are visible (and not obfuscated) in the file itself (not headers). diff --git a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp index 9c3ae4df51ff1..c05515f2c094f 100644 --- a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp +++ b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp @@ -789,6 +789,19 @@ TEST(SourceCodeTests, isHeaderFile) { EXPECT_TRUE(isHeaderFile("header.h", LangOpts)); } +TEST(SourceCodeTests, isKeywords) { + LangOptions LangOpts; + LangOpts.CPlusPlus20 = true; + EXPECT_TRUE(isKeyword("int", LangOpts)); + EXPECT_TRUE(isKeyword("return", LangOpts)); + EXPECT_TRUE(isKeyword("co_await", LangOpts)); + + // these are identifiers (not keywords!) with special meaning in some + // contexts. + EXPECT_FALSE(isKeyword("final", LangOpts)); + EXPECT_FALSE(isKeyword("override", LangOpts)); +} + } // namespace } // namespace clangd } // namespace clang From 7a932f4f4ccbc0c4294c6911d404f74529f3259b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 5 Oct 2020 14:23:13 +0100 Subject: [PATCH 024/321] [Parser] ParseMicrosoftAsmStatement - Replace bit '|' operator with logical '||' operator. (PR47071) Fixes static analysis warning. --- clang/lib/Parse/ParseStmtAsm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Parse/ParseStmtAsm.cpp b/clang/lib/Parse/ParseStmtAsm.cpp index 7d0818840a4fc..bdf40c291cb61 100644 --- a/clang/lib/Parse/ParseStmtAsm.cpp +++ b/clang/lib/Parse/ParseStmtAsm.cpp @@ -581,7 +581,7 @@ StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) { std::unique_ptr STI( TheTarget->createMCSubtargetInfo(TT, TO.CPU, FeaturesStr)); // Target MCTargetDesc may not be linked in clang-based tools. - if (!MAI || !MII | !MOFI || !STI) { + if (!MAI || !MII || !MOFI || !STI) { Diag(AsmLoc, diag::err_msasm_unable_to_create_target) << "target MC unavailable"; return EmptyStmt(); From 348d85a6c7950a5f14ee6c8741380b5876d99afd Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 3 Oct 2020 17:40:42 +0100 Subject: [PATCH 025/321] [VPlan] Clean up uses/operands on VPBB deletion. Update the code responsible for deleting VPBBs and recipes to properly update users and release operands. This is another preparation for D84680 & following patches towards enabling modeling def-use chains in VPlan. --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 26 ++++++++++++++++++- llvm/lib/Transforms/Vectorize/VPlan.h | 13 +++++++--- .../Transforms/Vectorize/VPlanTransforms.cpp | 3 +++ llvm/lib/Transforms/Vectorize/VPlanValue.h | 4 +++ .../Transforms/Vectorize/VPlanSlpTest.cpp | 23 ++++++++++++++++ .../Transforms/Vectorize/VPlanTest.cpp | 18 +++++++++++++ 6 files changed, 82 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index cb5a43272e54b..97292f138448e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -181,8 +181,15 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { void VPBlockBase::deleteCFG(VPBlockBase *Entry) { SmallVector Blocks; - for (VPBlockBase *Block : depth_first(Entry)) + + VPValue DummyValue; + for (VPBlockBase *Block : depth_first(Entry)) { + // Drop all references in VPBasicBlocks and replace all uses with + // DummyValue. + if (auto *VPBB = dyn_cast(Block)) + VPBB->dropAllReferences(&DummyValue); Blocks.push_back(Block); + } for (VPBlockBase *Block : Blocks) delete Block; @@ -305,6 +312,17 @@ void VPBasicBlock::execute(VPTransformState *State) { LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB); } +void VPBasicBlock::dropAllReferences(VPValue *NewValue) { + for (VPRecipeBase &R : Recipes) { + if (auto *VPV = R.toVPValue()) + VPV->replaceAllUsesWith(NewValue); + + if (auto *User = R.toVPUser()) + for (unsigned I = 0, E = User->getNumOperands(); I != E; I++) + User->setOperand(I, NewValue); + } +} + void VPRegionBlock::execute(VPTransformState *State) { ReversePostOrderTraversal RPOT(Entry); @@ -376,6 +394,12 @@ void VPRecipeBase::removeFromParent() { Parent = nullptr; } +VPValue *VPRecipeBase::toVPValue() { + if (auto *V = dyn_cast(this)) + return V; + return nullptr; +} + iplist::iterator VPRecipeBase::eraseFromParent() { assert(getParent() && "Recipe not in any VPBasicBlock"); return getParent()->getRecipeList().erase(getIterator()); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index fae73fdc57820..2a51162fa7c8f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -680,6 +680,10 @@ class VPRecipeBase : public ilist_node_with_parent { /// Returns a pointer to a VPUser, if the recipe inherits from VPUser or /// nullptr otherwise. VPUser *toVPUser(); + + /// Returns a pointer to a VPValue, if the recipe inherits from VPValue or + /// nullptr otherwise. + VPValue *toVPValue(); }; inline bool VPUser::classof(const VPRecipeBase *Recipe) { @@ -1362,6 +1366,10 @@ class VPBasicBlock : public VPBlockBase { /// this VPBasicBlock, thereby "executing" the VPlan. void execute(struct VPTransformState *State) override; + /// Replace all operands of VPUsers in the block with \p NewValue and also + /// replaces all uses of VPValues defined in the block with NewValue. + void dropAllReferences(VPValue *NewValue); + private: /// Create an IR BasicBlock to hold the output instructions generated by this /// VPBasicBlock, and return it. Update the CFGState accordingly. @@ -2006,10 +2014,7 @@ class VPlanSlp { public: VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {} - ~VPlanSlp() { - for (auto &KV : BundleToCombined) - delete KV.second; - } + ~VPlanSlp() = default; /// Tries to build an SLP tree rooted at \p Operands and returns a /// VPInstruction combining \p Operands, if they can be combined. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 3a4872a721221..45aeb201c28ec 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -35,6 +35,7 @@ void VPlanTransforms::VPInstructionsToVPRecipes( Plan->addCBV(NCondBit); } } + VPValue DummyValue; for (VPBlockBase *Base : RPOT) { // Do not widen instructions in pre-header and exit blocks. if (Base->getNumPredecessors() == 0 || Base->getNumSuccessors() == 0) @@ -48,6 +49,7 @@ void VPlanTransforms::VPInstructionsToVPRecipes( VPInstruction *VPInst = cast(Ingredient); Instruction *Inst = cast(VPInst->getUnderlyingValue()); if (DeadInstructions.count(Inst)) { + VPInst->replaceAllUsesWith(&DummyValue); Ingredient->eraseFromParent(); continue; } @@ -77,6 +79,7 @@ void VPlanTransforms::VPInstructionsToVPRecipes( new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands())); NewRecipe->insertBefore(Ingredient); + VPInst->replaceAllUsesWith(&DummyValue); Ingredient->eraseFromParent(); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 0882837170f20..e51c19601f886 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -168,6 +168,10 @@ class VPUser { VPUser(const VPUser &) = delete; VPUser &operator=(const VPUser &) = delete; + virtual ~VPUser() { + for (VPValue *Op : operands()) + Op->removeUser(*this); + } void addOperand(VPValue *Operand) { Operands.push_back(Operand); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp index 6ebef45e3cdfa..2d868d3cd8afc 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp @@ -116,6 +116,11 @@ TEST_F(VPlanSlpTest, testSlpSimple_2) { auto *CombinedLoadB = cast(CombinedAdd->getOperand(1)); EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode()); EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadB->getOpcode()); + + delete CombinedStore; + delete CombinedAdd; + delete CombinedLoadA; + delete CombinedLoadB; } TEST_F(VPlanSlpTest, testSlpSimple_3) { @@ -190,6 +195,11 @@ TEST_F(VPlanSlpTest, testSlpSimple_3) { VPInstruction *GetB = cast(&*std::next(Body->begin(), 3)); EXPECT_EQ(GetA, CombinedLoadA->getOperand(0)); EXPECT_EQ(GetB, CombinedLoadB->getOperand(0)); + + delete CombinedStore; + delete CombinedAdd; + delete CombinedLoadA; + delete CombinedLoadB; } TEST_F(VPlanSlpTest, testSlpReuse_1) { @@ -249,6 +259,10 @@ TEST_F(VPlanSlpTest, testSlpReuse_1) { auto *CombinedLoadA = cast(CombinedAdd->getOperand(0)); EXPECT_EQ(CombinedLoadA, CombinedAdd->getOperand(1)); EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode()); + + delete CombinedStore; + delete CombinedAdd; + delete CombinedLoadA; } TEST_F(VPlanSlpTest, testSlpReuse_2) { @@ -355,6 +369,15 @@ static void checkReorderExample(VPInstruction *Store1, VPInstruction *Store2, VPInstruction *LoadvD1 = cast(&*std::next(Body->begin(), 19)); EXPECT_EQ(LoadvD0->getOperand(0), CombinedLoadD->getOperand(0)); EXPECT_EQ(LoadvD1->getOperand(0), CombinedLoadD->getOperand(1)); + + delete CombinedStore; + delete CombinedAdd; + delete CombinedMulAB; + delete CombinedMulCD; + delete CombinedLoadA; + delete CombinedLoadB; + delete CombinedLoadC; + delete CombinedLoadD; } TEST_F(VPlanSlpTest, testSlpReorder_1) { diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 39727ca01d84a..e325439f72be3 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -179,6 +179,24 @@ TEST(VPInstructionTest, replaceAllUsesWith) { delete VPV3; } +TEST(VPInstructionTest, releaseOperandsAtDeletion) { + VPValue *VPV1 = new VPValue(); + VPValue *VPV2 = new VPValue(); + VPInstruction *I1 = new VPInstruction(0, {VPV1, VPV2}); + + EXPECT_EQ(1u, VPV1->getNumUsers()); + EXPECT_EQ(I1, *VPV1->user_begin()); + EXPECT_EQ(1u, VPV2->getNumUsers()); + EXPECT_EQ(I1, *VPV2->user_begin()); + + delete I1; + + EXPECT_EQ(0u, VPV1->getNumUsers()); + EXPECT_EQ(0u, VPV2->getNumUsers()); + + delete VPV1; + delete VPV2; +} TEST(VPBasicBlockTest, getPlan) { { VPBasicBlock *VPBB1 = new VPBasicBlock(); From 346b9d17720a0ccd920cd02b81811a4d2ddc67d6 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Mon, 5 Oct 2020 14:36:19 +0000 Subject: [PATCH 026/321] [mlir][Linalg] Canonicalize TensorCastOp away when it feeds a LinalgOp. This canonicalization is the counterpart of MemRefCastOp -> LinalgOp but on tensors. This is needed to properly canonicalize post linalg tiling on tensors. Differential Revision: https://reviews.llvm.org/D88729 --- .../Linalg/IR/LinalgStructuredOpsInterface.td | 36 ++++++++++++- .../include/mlir/Dialect/StandardOps/IR/Ops.h | 25 +++++++++ .../mlir/Dialect/StandardOps/IR/Ops.td | 2 +- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 54 +++++++++++++++++++ mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 54 +++++++++++++++++++ mlir/test/Dialect/Linalg/canonicalize.mlir | 20 +++++++ 6 files changed, 189 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td index f51f7b913027f..44c6b77ee4046 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td @@ -404,6 +404,19 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { return getInitTensors()[i]; }] >, + InterfaceMethod< + /*desc=*/[{ + Return the number of inputs, output buffers and init tensors operands. + }], + /*retTy=*/"unsigned", + /*methodName=*/"getNumShapedOperands", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + auto range = this->getOperation()->getOperands(); + return getNumInputsAndOutputBuffers() + $_op.getNumInitTensors(); + }] + >, InterfaceMethod< /*desc=*/[{ Return the range over inputs, output buffers and init tensors. @@ -414,7 +427,7 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { /*methodBody=*/"", /*defaultImplementation=*/[{ auto range = this->getOperation()->getOperands(); - return {range.begin(), range.begin() + getNumInputsAndOutputs()}; + return {range.begin(), range.begin() + getNumShapedOperands()}; }] >, InterfaceMethod< @@ -621,6 +634,27 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { }] > ]; + + let extraClassDeclaration = [{ + /// Returns all the operands past the inputs, output_buffers and + /// init_tensors operands. Asserts that these operands are value types to + /// allow transformations like tiling to just use the values when cloning + /// `linalgOp`. + SmallVector getAssumedNonShapedOperands() { + unsigned numShapedOperands = getNumInputsAndOutputs(); + unsigned nExtraOperands = + getOperation()->getNumOperands() - numShapedOperands; + SmallVector res; + res.reserve(nExtraOperands); + for (unsigned i = 0; i < nExtraOperands; ++i) { + res.push_back(getOperation()->getOperand(numShapedOperands + i)); + assert((res.back().getType().isSignlessIntOrIndexOrFloat() + || res.back().getType().isa()) && + "expected scalar or vector type"); + } + return res; + } + }]; } #endif // LINALG_IR_STRUCTURED_OPS_INTERFACE diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h index fbe735e31cff9..409f54384aca5 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h @@ -350,6 +350,31 @@ ParseResult parseDimAndSymbolList(OpAsmParser &parser, /// ``` bool canFoldIntoConsumerOp(MemRefCastOp castOp); +/// Counterpart of `canFoldIntoConsumerOp(MemRefCastOp castOp)` for tensors. +/// Determines whether TensorCastOp casts to a more dynamic version of the +/// source tensor. This is useful to fold a tensor_cast into a consuming op and +/// implement canonicalization patterns for ops in different dialects that may +/// consume the results of tensor_cast operations. Such foldable tensor_cast +/// operations are typically inserted as `subtensor` ops and are canonicalized, +/// to preserve the type compatibility of their uses. +/// +/// Returns true when all conditions are met: +/// 1. source and result are ranked tensors with same element type and rank. +/// 2. the tensor type has more static information than the result +/// +/// Example: +/// ```mlir +/// %1 = tensor_cast %0 : tensor<8x16xf32> to tensor +/// %2 = consumer %1 ... : tensor ... +/// ``` +/// +/// folds into: +/// +/// ```mlir +/// %2 = consumer %0 ... : tensor<8x16xf32> ... +/// ``` +bool canFoldIntoConsumerOp(TensorCastOp castOp); + /// Compute `lhs` `pred` `rhs`, where `pred` is one of the known integer /// comparison predicates. bool applyCmpPredicate(CmpIPredicate predicate, const APInt &lhs, diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 69c979ae9e387..ab7b599dffba7 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -3334,7 +3334,7 @@ def TensorCastOp : CastOp<"tensor_cast"> { ``` }]; - let arguments = (ins AnyTensor); + let arguments = (ins AnyTensor:$source); let results = (outs AnyTensor); let extraClassDeclaration = [{ diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index e9cdb3391f4a2..26aa75955e3c1 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -25,6 +25,7 @@ #include "mlir/IR/StandardTypes.h" #include "mlir/Support/LLVM.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" @@ -1498,12 +1499,65 @@ struct EraseDeadLinalgOp : public RewritePattern { return failure(); } }; + +struct FoldTensorCastOp : public RewritePattern { + FoldTensorCastOp(PatternBenefit benefit = 1) + : RewritePattern(benefit, MatchAnyOpTypeTag()) {} + + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override { + auto linalgOp = dyn_cast(op); + if (!linalgOp) + return failure(); + + // If no operand comes from a TensorCastOp and can be folded then fail. + bool hasTensorCastOperand = + llvm::any_of(linalgOp.getShapedOperands(), [&](Value v) { + if (v.isa()) + return false; + auto castOp = v.getDefiningOp(); + return castOp && canFoldIntoConsumerOp(castOp); + }); + if (!hasTensorCastOperand) + return failure(); + + SmallVector newResultTypes; + newResultTypes.reserve(op->getNumResults()); + SmallVector newOperands; + newOperands.reserve(op->getNumOperands()); + // Inputs may fold. + for (Value v : linalgOp.getInputs()) { + auto tensorCastOp = v.getDefiningOp(); + newOperands.push_back( + canFoldIntoConsumerOp(tensorCastOp) ? tensorCastOp.source() : v); + } + // Output buffers are memrefs, they don't fold. + newOperands.append(linalgOp.getOutputBuffers().begin(), + linalgOp.getOutputBuffers().end()); + // Init tensors may fold, in which case the resultType must also change. + for (Value v : linalgOp.getInitTensors()) { + auto tensorCastOp = v.getDefiningOp(); + bool fold = canFoldIntoConsumerOp(tensorCastOp); + newOperands.push_back(fold ? tensorCastOp.getOperand() : v); + newResultTypes.push_back(newOperands.back().getType()); + } + auto extraOperands = linalgOp.getAssumedNonShapedOperands(); + newOperands.append(extraOperands.begin(), extraOperands.end()); + // Clone op. + Operation *newOp = + linalgOp.clone(rewriter, op->getLoc(), newResultTypes, newOperands); + rewriter.replaceOp(op, newOp->getResults()); + + return success(); + } +}; } // namespace #define CANONICALIZERS_AND_FOLDERS(XXX) \ void XXX::getCanonicalizationPatterns(OwningRewritePatternList &results, \ MLIRContext *context) { \ results.insert(); \ + results.insert(); \ } \ \ LogicalResult XXX::fold(ArrayRef, \ diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index a4d739135aea3..f2823c564ccef 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -3157,6 +3157,60 @@ bool mlir::canFoldIntoConsumerOp(MemRefCastOp castOp) { return true; } +/// Counterpart of `canFoldIntoConsumerOp(MemRefCastOp castOp)` for tensors. +/// Determines whether TensorCastOp casts to a more dynamic version of the +/// source tensor. This is useful to fold a tensor_cast into a consuming op and +/// implement canonicalization patterns for ops in different dialects that may +/// consume the results of tensor_cast operations. Such foldable tensor_cast +/// operations are typically inserted as `subtensor` ops and are canonicalized, +/// to preserve the type compatibility of their uses. +/// +/// Returns true when all conditions are met: +/// 1. source and result are ranked tensors with same element type and rank. +/// 2. the tensor type has more static information than the result +/// +/// Example: +/// ```mlir +/// %1 = tensor_cast %0 : tensor<8x16xf32> to tensor +/// %2 = consumer %1 ... : tensor ... +/// ``` +/// +/// folds into: +/// +/// ```mlir +/// %2 = consumer %0 ... : tensor<8x16xf32> ... +/// ``` +bool mlir::canFoldIntoConsumerOp(TensorCastOp castOp) { + if (!castOp) + return false; + + RankedTensorType sourceType = + castOp.source().getType().dyn_cast(); + RankedTensorType resultType = castOp.getType().dyn_cast(); + + // Requires RankedTensorType. + if (!sourceType || !resultType) + return false; + + // Requires same elemental type. + if (sourceType.getElementType() != resultType.getElementType()) + return false; + + // Requires same rank. + if (sourceType.getRank() != resultType.getRank()) + return false; + + // If cast is towards more static sizes along any dimension, don't fold. + for (auto it : llvm::zip(sourceType.getShape(), resultType.getShape())) { + auto ss = std::get<0>(it), st = std::get<1>(it); + if (ss != st) + if (ShapedType::isDynamic(ss) && !ShapedType::isDynamic(st)) + return false; + } + + return true; +} + namespace { /// Pattern to rewrite a subview op with MemRefCast arguments. /// This essentially pushes memref_cast past its consuming subview when diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir index 5e0890fa4bb59..cf86a97f4fcdb 100644 --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -259,3 +259,23 @@ func @reshape_splat_constant_float64() -> tensor<2x4x2xf64> // CHECK: %[[CST:.*]] = constant dense<{{.*}}> : tensor<2x4x2xf64> // CHECK-NOT: linalg.tensor_reshape // CHECK: return %[[CST]] + +// ----- + +// CHECK-LABEL: func @tensor_cast( +func @tensor_cast(%a : tensor<3x4xf32>, %b : tensor<4x?xf32>, %c : tensor<3x?xf32>) + -> tensor<3x?xf32> +{ + %ta = tensor_cast %a : tensor<3x4xf32> to tensor + %tb = tensor_cast %b : tensor<4x?xf32> to tensor + %tc = tensor_cast %c : tensor<3x?xf32> to tensor + + // CHECK: linalg.matmul ins({{.*}}tensor<3x4xf32>, tensor<4x?xf32>) + // CHECK-SAME: init({{.*}}tensor<3x?xf32>) -> tensor<3x?xf32> + %0 = linalg.matmul ins(%ta, %tb: tensor, tensor) + init(%tc: tensor) -> tensor + + %1 = tensor_cast %0 : tensor to tensor<3x?xf32> + + return %1: tensor<3x?xf32> +} From 2573cf3c3d42c943cb91b6e85b803f7671260185 Mon Sep 17 00:00:00 2001 From: Sam Tebbs Date: Wed, 23 Sep 2020 11:43:27 +0100 Subject: [PATCH 027/321] [ARM]Fold select_cc(vecreduce_[u|s][min|max], x) into VMINV or VMAXV This folds a select_cc or select(set_cc) of a max or min vector reduction with a scalar value into a VMAXV or VMINV. Differential Revision: https://reviews.llvm.org/D87836 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 113 +++ llvm/lib/Target/ARM/ARMISelLowering.h | 4 + llvm/lib/Target/ARM/ARMInstrMVE.td | 34 + .../CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll | 647 ++++++++++++++++++ 4 files changed, 798 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 798ecf2487637..a818b66dd96fa 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -987,6 +987,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SMAX); setTargetDAGCombine(ISD::UMAX); setTargetDAGCombine(ISD::FP_EXTEND); + setTargetDAGCombine(ISD::SELECT); + setTargetDAGCombine(ISD::SELECT_CC); } if (!Subtarget->hasFP64()) { @@ -1740,6 +1742,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VMLALVAu: return "ARMISD::VMLALVAu"; case ARMISD::VMLALVAps: return "ARMISD::VMLALVAps"; case ARMISD::VMLALVApu: return "ARMISD::VMLALVApu"; + case ARMISD::VMINVu: return "ARMISD::VMINVu"; + case ARMISD::VMINVs: return "ARMISD::VMINVs"; + case ARMISD::VMAXVu: return "ARMISD::VMAXVu"; + case ARMISD::VMAXVs: return "ARMISD::VMAXVs"; case ARMISD::UMAAL: return "ARMISD::UMAAL"; case ARMISD::UMLAL: return "ARMISD::UMLAL"; case ARMISD::SMLAL: return "ARMISD::SMLAL"; @@ -12093,6 +12099,111 @@ static SDValue PerformAddeSubeCombine(SDNode *N, return SDValue(); } +static SDValue PerformSELECTCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + if (!Subtarget->hasMVEIntegerOps()) + return SDValue(); + + SDLoc dl(N); + SDValue SetCC; + SDValue LHS; + SDValue RHS; + ISD::CondCode CC; + SDValue TrueVal; + SDValue FalseVal; + + if (N->getOpcode() == ISD::SELECT && + N->getOperand(0)->getOpcode() == ISD::SETCC) { + SetCC = N->getOperand(0); + LHS = SetCC->getOperand(0); + RHS = SetCC->getOperand(1); + CC = cast(SetCC->getOperand(2))->get(); + TrueVal = N->getOperand(1); + FalseVal = N->getOperand(2); + } else if (N->getOpcode() == ISD::SELECT_CC) { + LHS = N->getOperand(0); + RHS = N->getOperand(1); + CC = cast(N->getOperand(4))->get(); + TrueVal = N->getOperand(2); + FalseVal = N->getOperand(3); + } else { + return SDValue(); + } + + unsigned int Opcode = 0; + if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN || + FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) && + (CC == ISD::SETULT || CC == ISD::SETUGT)) { + Opcode = ARMISD::VMINVu; + if (CC == ISD::SETUGT) + std::swap(TrueVal, FalseVal); + } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN || + FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) && + (CC == ISD::SETLT || CC == ISD::SETGT)) { + Opcode = ARMISD::VMINVs; + if (CC == ISD::SETGT) + std::swap(TrueVal, FalseVal); + } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX || + FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) && + (CC == ISD::SETUGT || CC == ISD::SETULT)) { + Opcode = ARMISD::VMAXVu; + if (CC == ISD::SETULT) + std::swap(TrueVal, FalseVal); + } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX || + FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) && + (CC == ISD::SETGT || CC == ISD::SETLT)) { + Opcode = ARMISD::VMAXVs; + if (CC == ISD::SETLT) + std::swap(TrueVal, FalseVal); + } else + return SDValue(); + + // Normalise to the right hand side being the vector reduction + switch (TrueVal->getOpcode()) { + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_SMAX: + std::swap(LHS, RHS); + std::swap(TrueVal, FalseVal); + break; + } + + EVT VectorType = FalseVal->getOperand(0).getValueType(); + + if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 && + VectorType != MVT::v4i32) + return SDValue(); + + EVT VectorScalarType = VectorType.getVectorElementType(); + + // The values being selected must also be the ones being compared + if (TrueVal != LHS || FalseVal != RHS) + return SDValue(); + + EVT LeftType = LHS->getValueType(0); + EVT RightType = RHS->getValueType(0); + + // The types must match the reduced type too + if (LeftType != VectorScalarType || RightType != VectorScalarType) + return SDValue(); + + // Legalise the scalar to an i32 + if (VectorScalarType != MVT::i32) + LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); + + // Generate the reduction as an i32 for legalisation purposes + auto Reduction = + DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0)); + + // The result isn't actually an i32 so truncate it back to its original type + if (VectorScalarType != MVT::i32) + Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction); + + return Reduction; +} + static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -16049,6 +16160,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; + case ISD::SELECT_CC: + case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget); case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget); case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget); case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index f5bb097062aff..90cbf1eea0481 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -241,6 +241,10 @@ class VectorType; VMLALVAu, // provided as low and high halves VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask VMLALVApu, + VMINVu, // Find minimum unsigned value of a vector and register + VMINVs, // Find minimum signed value of a vector and register + VMAXVu, // Find maximum unsigned value of a vector and register + VMAXVs, // Find maximum signed value of a vector and register SMULWB, // Signed multiply word by half word, bottom SMULWT, // Signed multiply word by half word, top diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index d9e9cf1176fc7..f7f403503dc7f 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -944,6 +944,14 @@ multiclass MVE_VMINMAXV_ty { defm u32: MVE_VMINMAXV_p; } +def SDTVecReduceR : SDTypeProfile<1, 2, [ // Reduction of an integer and vector into an integer + SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2> +]>; +def ARMVMINVu : SDNode<"ARMISD::VMINVu", SDTVecReduceR>; +def ARMVMINVs : SDNode<"ARMISD::VMINVs", SDTVecReduceR>; +def ARMVMAXVu : SDNode<"ARMISD::VMAXVu", SDTVecReduceR>; +def ARMVMAXVs : SDNode<"ARMISD::VMAXVs", SDTVecReduceR>; + defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">; defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0, "int_arm_mve_maxv">; @@ -974,6 +982,32 @@ let Predicates = [HasMVEInt] in { def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))), (i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>; + def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v16i8 MQPR:$src))), + (i32 (MVE_VMINVu8 $x, $src))>; + def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v8i16 MQPR:$src))), + (i32 (MVE_VMINVu16 $x, $src))>; + def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v4i32 MQPR:$src))), + (i32 (MVE_VMINVu32 $x, $src))>; + def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v16i8 MQPR:$src))), + (i32 (MVE_VMINVs8 $x, $src))>; + def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v8i16 MQPR:$src))), + (i32 (MVE_VMINVs16 $x, $src))>; + def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v4i32 MQPR:$src))), + (i32 (MVE_VMINVs32 $x, $src))>; + + def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v16i8 MQPR:$src))), + (i32 (MVE_VMAXVu8 $x, $src))>; + def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v8i16 MQPR:$src))), + (i32 (MVE_VMAXVu16 $x, $src))>; + def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v4i32 MQPR:$src))), + (i32 (MVE_VMAXVu32 $x, $src))>; + def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v16i8 MQPR:$src))), + (i32 (MVE_VMAXVs8 $x, $src))>; + def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v8i16 MQPR:$src))), + (i32 (MVE_VMAXVs16 $x, $src))>; + def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v4i32 MQPR:$src))), + (i32 (MVE_VMAXVs32 $x, $src))>; + } multiclass MVE_VMINMAXAV_ty { diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll new file mode 100644 index 0000000000000..13b831efabc57 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll @@ -0,0 +1,647 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s + +define arm_aapcs_vfpcc zeroext i8 @uminv16i8(<16 x i8> %vec, i8 zeroext %min) { +; CHECK-LABEL: uminv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u8 r0, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec) + %cmp = icmp ult i8 %x, %min + %1 = select i1 %cmp, i8 %x, i8 %min + ret i8 %1 +} + +define arm_aapcs_vfpcc zeroext i16 @uminv8i16(<8 x i16> %vec, i16 zeroext %min) { +; CHECK-LABEL: uminv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u16 r0, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec) + %cmp = icmp ult i16 %x, %min + %1 = select i1 %cmp, i16 %x, i16 %min + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @uminv4i32(<4 x i32> %vec, i32 %min) { +; CHECK-LABEL: uminv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec) + %cmp = icmp ult i32 %x, %min + %1 = select i1 %cmp, i32 %x, i32 %min + ret i32 %1 +} + +define arm_aapcs_vfpcc signext i8 @sminv16i8(<16 x i8> %vec, i8 signext %min) { +; CHECK-LABEL: sminv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s8 r0, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec) + %cmp = icmp slt i8 %x, %min + %1 = select i1 %cmp, i8 %x, i8 %min + ret i8 %1 +} + +define arm_aapcs_vfpcc signext i16 @sminv8i16(<8 x i16> %vec, i16 signext %min) { +; CHECK-LABEL: sminv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s16 r0, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec) + %cmp = icmp slt i16 %x, %min + %1 = select i1 %cmp, i16 %x, i16 %min + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @sminv4i32(<4 x i32> %vec, i32 %min) { +; CHECK-LABEL: sminv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec) + %cmp = icmp slt i32 %x, %min + %1 = select i1 %cmp, i32 %x, i32 %min + ret i32 %1 +} + +define arm_aapcs_vfpcc zeroext i8 @umaxv16i8(<16 x i8> %vec, i8 zeroext %max) { +; CHECK-LABEL: umaxv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u8 r0, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec) + %cmp = icmp ugt i8 %x, %max + %1 = select i1 %cmp, i8 %x, i8 %max + ret i8 %1 +} + +define arm_aapcs_vfpcc zeroext i16 @umaxv8i16(<8 x i16> %vec, i16 zeroext %max) { +; CHECK-LABEL: umaxv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u16 r0, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec) + %cmp = icmp ugt i16 %x, %max + %1 = select i1 %cmp, i16 %x, i16 %max + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @umaxv4i32(<4 x i32> %vec, i32 %max) { +; CHECK-LABEL: umaxv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec) + %cmp = icmp ugt i32 %x, %max + %1 = select i1 %cmp, i32 %x, i32 %max + ret i32 %1 +} + +define arm_aapcs_vfpcc signext i8 @smaxv16i8(<16 x i8> %vec, i8 signext %max) { +; CHECK-LABEL: smaxv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s8 r0, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) + %cmp = icmp sgt i8 %x, %max + %1 = select i1 %cmp, i8 %x, i8 %max + ret i8 %1 +} + +define arm_aapcs_vfpcc signext i16 @smaxv8i16(<8 x i16> %vec, i16 signext %max) { +; CHECK-LABEL: smaxv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s16 r0, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec) + %cmp = icmp sgt i16 %x, %max + %1 = select i1 %cmp, i16 %x, i16 %max + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @smaxv4i32(<4 x i32> %vec, i32 %max) { +; CHECK-LABEL: smaxv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec) + %cmp = icmp sgt i32 %x, %max + %1 = select i1 %cmp, i32 %x, i32 %max + ret i32 %1 +} + +define arm_aapcs_vfpcc zeroext i8 @commute_uminv16i8(<16 x i8> %vec, i8 zeroext %min) { +; CHECK-LABEL: commute_uminv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u8 r0, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec) + %cmp = icmp ult i8 %min, %x + %1 = select i1 %cmp, i8 %min, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc zeroext i16 @commute_uminv8i16(<8 x i16> %vec, i16 zeroext %min) { +; CHECK-LABEL: commute_uminv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u16 r0, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec) + %cmp = icmp ult i16 %min, %x + %1 = select i1 %cmp, i16 %min, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @commute_uminv4i32(<4 x i32> %vec, i32 %min) { +; CHECK-LABEL: commute_uminv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec) + %cmp = icmp ult i32 %min, %x + %1 = select i1 %cmp, i32 %min, i32 %x + ret i32 %1 +} + +define arm_aapcs_vfpcc signext i8 @commute_sminv16i8(<16 x i8> %vec, i8 signext %min) { +; CHECK-LABEL: commute_sminv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s8 r0, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec) + %cmp = icmp slt i8 %min, %x + %1 = select i1 %cmp, i8 %min, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc signext i16 @commute_sminv8i16(<8 x i16> %vec, i16 signext %min) { +; CHECK-LABEL: commute_sminv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s16 r0, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec) + %cmp = icmp slt i16 %min, %x + %1 = select i1 %cmp, i16 %min, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @commute_sminv4i32(<4 x i32> %vec, i32 %min) { +; CHECK-LABEL: commute_sminv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec) + %cmp = icmp slt i32 %min, %x + %1 = select i1 %cmp, i32 %min, i32 %x + ret i32 %1 +} + +define arm_aapcs_vfpcc zeroext i8 @commute_umaxv16i8(<16 x i8> %vec, i8 zeroext %max) { +; CHECK-LABEL: commute_umaxv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u8 r0, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec) + %cmp = icmp ugt i8 %max, %x + %1 = select i1 %cmp, i8 %max, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc zeroext i16 @commute_umaxv8i16(<8 x i16> %vec, i16 zeroext %max) { +; CHECK-LABEL: commute_umaxv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u16 r0, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec) + %cmp = icmp ugt i16 %max, %x + %1 = select i1 %cmp, i16 %max, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @commute_umaxv4i32(<4 x i32> %vec, i32 %max) { +; CHECK-LABEL: commute_umaxv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec) + %cmp = icmp ugt i32 %max, %x + %1 = select i1 %cmp, i32 %max, i32 %x + ret i32 %1 +} + +define arm_aapcs_vfpcc signext i8 @commute_smaxv16i8(<16 x i8> %vec, i8 signext %max) { +; CHECK-LABEL: commute_smaxv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s8 r0, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) + %cmp = icmp sgt i8 %max, %x + %1 = select i1 %cmp, i8 %max, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc signext i16 @commute_smaxv8i16(<8 x i16> %vec, i16 signext %max) { +; CHECK-LABEL: commute_smaxv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s16 r0, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec) + %cmp = icmp sgt i16 %max, %x + %1 = select i1 %cmp, i16 %max, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @commute_smaxv4i32(<4 x i32> %vec, i32 %max) { +; CHECK-LABEL: commute_smaxv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec) + %cmp = icmp sgt i32 %max, %x + %1 = select i1 %cmp, i32 %max, i32 %x + ret i32 %1 +} + +define arm_aapcs_vfpcc signext i8 @mismatch_smaxv16i8(<16 x i8> %vec, i8 signext %max) { +; CHECK-LABEL: mismatch_smaxv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: mvn r1, #127 +; CHECK-NEXT: vmaxv.s8 r1, q0 +; CHECK-NEXT: sxtb r2, r1 +; CHECK-NEXT: cmp r2, r0 +; CHECK-NEXT: csel r0, r0, r1, gt +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) + %cmp = icmp sgt i8 %x, %max + %1 = select i1 %cmp, i8 %max, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc signext i8 @mismatch2_smaxv16i8(<16 x i8> %vec, i8 signext %max) { +; CHECK-LABEL: mismatch2_smaxv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: mvn r1, #127 +; CHECK-NEXT: vmaxv.s8 r1, q0 +; CHECK-NEXT: sxtb r2, r1 +; CHECK-NEXT: cmp r0, r2 +; CHECK-NEXT: csel r0, r1, r0, gt +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) + %cmp = icmp sgt i8 %max, %x + %1 = select i1 %cmp, i8 %x, i8 %max + ret i8 %1 +} + +define arm_aapcs_vfpcc zeroext i8 @inverted_uminv16i8(<16 x i8> %vec, i8 zeroext %min) { +; CHECK-LABEL: inverted_uminv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u8 r0, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec) + %cmp = icmp ugt i8 %x, %min + %1 = select i1 %cmp, i8 %min, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc zeroext i16 @inverted_uminv8i16(<8 x i16> %vec, i16 zeroext %min) { +; CHECK-LABEL: inverted_uminv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u16 r0, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec) + %cmp = icmp ugt i16 %x, %min + %1 = select i1 %cmp, i16 %min, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @inverted_uminv4i32(<4 x i32> %vec, i32 %min) { +; CHECK-LABEL: inverted_uminv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec) + %cmp = icmp ugt i32 %x, %min + %1 = select i1 %cmp, i32 %min, i32 %x + ret i32 %1 +} + +define arm_aapcs_vfpcc signext i8 @inverted_sminv16i8(<16 x i8> %vec, i8 signext %min) { +; CHECK-LABEL: inverted_sminv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s8 r0, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec) + %cmp = icmp sgt i8 %x, %min + %1 = select i1 %cmp, i8 %min, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc signext i16 @inverted_sminv8i16(<8 x i16> %vec, i16 signext %min) { +; CHECK-LABEL: inverted_sminv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s16 r0, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec) + %cmp = icmp sgt i16 %x, %min + %1 = select i1 %cmp, i16 %min, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @inverted_sminv4i32(<4 x i32> %vec, i32 %min) { +; CHECK-LABEL: inverted_sminv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec) + %cmp = icmp sgt i32 %x, %min + %1 = select i1 %cmp, i32 %min, i32 %x + ret i32 %1 +} + +define arm_aapcs_vfpcc zeroext i8 @inverted_umaxv16i8(<16 x i8> %vec, i8 zeroext %max) { +; CHECK-LABEL: inverted_umaxv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u8 r0, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec) + %cmp = icmp ult i8 %x, %max + %1 = select i1 %cmp, i8 %max, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc zeroext i16 @inverted_umaxv8i16(<8 x i16> %vec, i16 zeroext %max) { +; CHECK-LABEL: inverted_umaxv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u16 r0, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec) + %cmp = icmp ult i16 %x, %max + %1 = select i1 %cmp, i16 %max, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @inverted_umaxv4i32(<4 x i32> %vec, i32 %max) { +; CHECK-LABEL: inverted_umaxv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec) + %cmp = icmp ult i32 %x, %max + %1 = select i1 %cmp, i32 %max, i32 %x + ret i32 %1 +} + +define arm_aapcs_vfpcc signext i8 @inverted_smaxv16i8(<16 x i8> %vec, i8 signext %max) { +; CHECK-LABEL: inverted_smaxv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s8 r0, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) + %cmp = icmp slt i8 %x, %max + %1 = select i1 %cmp, i8 %max, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc signext i16 @inverted_smaxv8i16(<8 x i16> %vec, i16 signext %max) { +; CHECK-LABEL: inverted_smaxv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s16 r0, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec) + %cmp = icmp slt i16 %x, %max + %1 = select i1 %cmp, i16 %max, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @inverted_smaxv4i32(<4 x i32> %vec, i32 %max) { +; CHECK-LABEL: inverted_smaxv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec) + %cmp = icmp slt i32 %x, %max + %1 = select i1 %cmp, i32 %max, i32 %x + ret i32 %1 +} + +define arm_aapcs_vfpcc signext i16 @trunc_and_sext(<8 x i16> %vec, i32 %max) #1 { +; CHECK-LABEL: trunc_and_sext: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, #32768 +; CHECK-NEXT: movt r1, #65535 +; CHECK-NEXT: vmaxv.s16 r1, q0 +; CHECK-NEXT: sxth r2, r1 +; CHECK-NEXT: cmp r0, r2 +; CHECK-NEXT: csel r0, r0, r1, gt +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec) + %xs = sext i16 %x to i32 + %cmp = icmp sgt i32 %max, %xs + %mt = trunc i32 %max to i16 + %1 = select i1 %cmp, i16 %mt, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc signext i16 @trunc_and_zext(<8 x i16> %vec, i32 %max) #1 { +; CHECK-LABEL: trunc_and_zext: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: vmaxv.u16 r1, q0 +; CHECK-NEXT: uxth r2, r1 +; CHECK-NEXT: cmp r0, r2 +; CHECK-NEXT: csel r0, r0, r1, gt +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec) + %xs = zext i16 %x to i32 + %cmp = icmp sgt i32 %max, %xs + %mt = trunc i32 %max to i16 + %1 = select i1 %cmp, i16 %mt, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i64 @uminv2i64(<2 x i64> %vec, i64 %min) { +; CHECK-LABEL: uminv2i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vmov r12, s3 +; CHECK-NEXT: vmov lr, s1 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: cmp lr, r12 +; CHECK-NEXT: csel r4, r2, r3, lo +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: csel r2, r2, r3, lo +; CHECK-NEXT: cmp lr, r12 +; CHECK-NEXT: csel r5, r2, r4, eq +; CHECK-NEXT: csel r3, lr, r12, lo +; CHECK-NEXT: subs r2, r5, r0 +; CHECK-NEXT: mov.w r4, #0 +; CHECK-NEXT: sbcs.w r2, r3, r1 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r0, r5, r0, ne +; CHECK-NEXT: csel r1, r3, r1, ne +; CHECK-NEXT: pop {r4, r5, r7, pc} + %x = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %vec) + %cmp = icmp ult i64 %x, %min + %1 = select i1 %cmp, i64 %x, i64 %min + ret i64 %1 +} + +define arm_aapcs_vfpcc i64 @sminv2i64(<2 x i64> %vec, i64 %min) { +; CHECK-LABEL: sminv2i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vmov r12, s3 +; CHECK-NEXT: vmov lr, s1 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: cmp lr, r12 +; CHECK-NEXT: csel r4, r2, r3, lt +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: csel r2, r2, r3, lo +; CHECK-NEXT: cmp lr, r12 +; CHECK-NEXT: csel r5, r2, r4, eq +; CHECK-NEXT: csel r3, lr, r12, lt +; CHECK-NEXT: subs r2, r5, r0 +; CHECK-NEXT: mov.w r4, #0 +; CHECK-NEXT: sbcs.w r2, r3, r1 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r0, r5, r0, ne +; CHECK-NEXT: csel r1, r3, r1, ne +; CHECK-NEXT: pop {r4, r5, r7, pc} + %x = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %vec) + %cmp = icmp slt i64 %x, %min + %1 = select i1 %cmp, i64 %x, i64 %min + ret i64 %1 +} + +define arm_aapcs_vfpcc i64 @umaxv2i64(<2 x i64> %vec, i64 %max) { +; CHECK-LABEL: umaxv2i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vmov r12, s3 +; CHECK-NEXT: vmov lr, s1 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: cmp lr, r12 +; CHECK-NEXT: csel r4, r2, r3, hi +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: csel r2, r2, r3, hi +; CHECK-NEXT: cmp lr, r12 +; CHECK-NEXT: csel r5, r2, r4, eq +; CHECK-NEXT: csel r3, lr, r12, hi +; CHECK-NEXT: subs r2, r0, r5 +; CHECK-NEXT: mov.w r4, #0 +; CHECK-NEXT: sbcs.w r2, r1, r3 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r0, r5, r0, ne +; CHECK-NEXT: csel r1, r3, r1, ne +; CHECK-NEXT: pop {r4, r5, r7, pc} + %x = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %vec) + %cmp = icmp ugt i64 %x, %max + %1 = select i1 %cmp, i64 %x, i64 %max + ret i64 %1 +} + +define arm_aapcs_vfpcc i64 @smaxv2i64(<2 x i64> %vec, i64 %max) { +; CHECK-LABEL: smaxv2i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vmov r12, s3 +; CHECK-NEXT: vmov lr, s1 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: cmp lr, r12 +; CHECK-NEXT: csel r4, r2, r3, gt +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: csel r2, r2, r3, hi +; CHECK-NEXT: cmp lr, r12 +; CHECK-NEXT: csel r5, r2, r4, eq +; CHECK-NEXT: csel r3, lr, r12, gt +; CHECK-NEXT: subs r2, r0, r5 +; CHECK-NEXT: mov.w r4, #0 +; CHECK-NEXT: sbcs.w r2, r1, r3 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r0, r5, r0, ne +; CHECK-NEXT: csel r1, r3, r1, ne +; CHECK-NEXT: pop {r4, r5, r7, pc} + %x = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %vec) + %cmp = icmp sgt i64 %x, %max + %1 = select i1 %cmp, i64 %x, i64 %max + ret i64 %1 +} + +declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>) + +declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>) + +declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) + +declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>) + +declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>) + +declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>) + +declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) + +declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>) + +declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>) + +declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>) + +declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>) + +declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>) + +declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>) + +declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>) + +declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) + +declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>) From 34b61d6cd58aea88d44fef9fa4f409f2e324a4d4 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Fri, 25 Sep 2020 18:34:17 +0200 Subject: [PATCH 028/321] [SystemZ] Add support for .insn directives for vector instructions. Support VRI, VRR, VRS, VRV, VRX, VSI instruction formats with the .insn directive. Review: Ulrich Weigand Differential Revision: https://reviews.llvm.org/D88357 --- .../SystemZ/AsmParser/SystemZAsmParser.cpp | 21 +++++++- .../lib/Target/SystemZ/SystemZInstrFormats.td | 49 +++++++++++++++++++ llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 25 ++++++++++ llvm/test/MC/SystemZ/directive-insn-vector.s | 27 ++++++++++ 4 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 llvm/test/MC/SystemZ/directive-insn-vector.s diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index d5a3a19446c7a..be96612383c7d 100644 --- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -565,7 +565,7 @@ struct InsnMatchEntry { StringRef Format; uint64_t Opcode; int32_t NumOperands; - MatchClassKind OperandKinds[5]; + MatchClassKind OperandKinds[7]; }; // For equal_range comparison. @@ -633,7 +633,20 @@ static struct InsnMatchEntry InsnMatchTable[] = { { "sse", SystemZ::InsnSSE, 3, { MCK_U48Imm, MCK_BDAddr64Disp12, MCK_BDAddr64Disp12 } }, { "ssf", SystemZ::InsnSSF, 4, - { MCK_U48Imm, MCK_BDAddr64Disp12, MCK_BDAddr64Disp12, MCK_AnyReg } } + { MCK_U48Imm, MCK_BDAddr64Disp12, MCK_BDAddr64Disp12, MCK_AnyReg } }, + { "vri", SystemZ::InsnVRI, 6, + { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_U12Imm, MCK_U4Imm, MCK_U4Imm } }, + { "vrr", SystemZ::InsnVRR, 7, + { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_AnyReg, MCK_U4Imm, MCK_U4Imm, + MCK_U4Imm } }, + { "vrs", SystemZ::InsnVRS, 5, + { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_BDAddr64Disp12, MCK_U4Imm } }, + { "vrv", SystemZ::InsnVRV, 4, + { MCK_U48Imm, MCK_AnyReg, MCK_BDVAddr64Disp12, MCK_U4Imm } }, + { "vrx", SystemZ::InsnVRX, 4, + { MCK_U48Imm, MCK_AnyReg, MCK_BDXAddr64Disp12, MCK_U4Imm } }, + { "vsi", SystemZ::InsnVSI, 4, + { MCK_U48Imm, MCK_AnyReg, MCK_BDAddr64Disp12, MCK_U8Imm } } }; static void printMCExpr(const MCExpr *E, raw_ostream &OS) { @@ -1199,6 +1212,8 @@ bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) { ResTy = parseBDXAddr64(Operands); else if (Kind == MCK_BDAddr64Disp12 || Kind == MCK_BDAddr64Disp20) ResTy = parseBDAddr64(Operands); + else if (Kind == MCK_BDVAddr64Disp12) + ResTy = parseBDVAddr64(Operands); else if (Kind == MCK_PCRel32) ResTy = parsePCRel32(Operands); else if (Kind == MCK_PCRel16) @@ -1243,6 +1258,8 @@ bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) { ZOperand.addBDAddrOperands(Inst, 2); else if (ZOperand.isMem(BDXMem)) ZOperand.addBDXAddrOperands(Inst, 3); + else if (ZOperand.isMem(BDVMem)) + ZOperand.addBDVAddrOperands(Inst, 3); else if (ZOperand.isImm()) ZOperand.addImmOperands(Inst, 1); else diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index 50f1e09c6ee51..4aac2eec655fb 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -1764,6 +1764,55 @@ class DirectiveInsnSSF pattern> let Inst{35-32} = enc{35-32}; } +class DirectiveInsnVRI pattern> + : InstVRIe<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnVRR pattern> + : InstVRRc<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnVRS pattern> + : InstVRSc<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnVRV pattern> + : InstVRV<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnVRX pattern> + : InstVRX<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnVSI pattern> + : InstVSI<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + + //===----------------------------------------------------------------------===// // Variants of instructions with condition mask //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index a2e207aedcde2..00a0d484cc2b9 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -2246,6 +2246,31 @@ let isCodeGenOnly = 1, hasSideEffects = 1 in { (ins imm64zx48:$enc, bdaddr12only:$BD1, bdaddr12only:$BD2, AnyReg:$R3), ".insn ssf,$enc,$BD1,$BD2,$R3", []>; + def InsnVRI : DirectiveInsnVRI<(outs), + (ins imm64zx48:$enc, VR128:$V1, VR128:$V2, + imm32zx12:$I3, imm32zx4:$M4, imm32zx4:$M5), + ".insn vri,$enc,$V1,$V2,$I3,$M4,$M5", []>; + def InsnVRR : DirectiveInsnVRR<(outs), + (ins imm64zx48:$enc, VR128:$V1, VR128:$V2, + VR128:$V3, imm32zx4:$M4, imm32zx4:$M5, + imm32zx4:$M6), + ".insn vrr,$enc,$V1,$V2,$V3,$M4,$M5,$M6", []>; + def InsnVRS : DirectiveInsnVRS<(outs), + (ins imm64zx48:$enc, AnyReg:$R1, VR128:$V3, + bdaddr12only:$BD2, imm32zx4:$M4), + ".insn vrs,$enc,$BD2,$M4", []>; + def InsnVRV : DirectiveInsnVRV<(outs), + (ins imm64zx48:$enc, VR128:$V1, + bdvaddr12only:$VBD2, imm32zx4:$M3), + ".insn vrv,$enc,$V1,$VBD2,$M3", []>; + def InsnVRX : DirectiveInsnVRX<(outs), + (ins imm64zx48:$enc, VR128:$V1, + bdxaddr12only:$XBD2, imm32zx4:$M3), + ".insn vrx,$enc,$V1,$XBD2,$M3", []>; + def InsnVSI : DirectiveInsnVSI<(outs), + (ins imm64zx48:$enc, VR128:$V1, + bdaddr12only:$BD2, imm32zx8:$I3), + ".insn vsi,$enc,$V1,$BD2,$I3", []>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/SystemZ/directive-insn-vector.s b/llvm/test/MC/SystemZ/directive-insn-vector.s new file mode 100644 index 0000000000000..04c53a8bbf85b --- /dev/null +++ b/llvm/test/MC/SystemZ/directive-insn-vector.s @@ -0,0 +1,27 @@ +# RUN: llvm-mc -triple s390x-linux-gnu -filetype=obj %s | \ +# RUN: llvm-objdump --mcpu=z14 -d - | FileCheck %s + +# Test the .insn directive for vector instructions. + +#CHECK: e7 23 2f ff 10 13 vgef %v2, 4095(%v3,%r2), 1 + .insn vrv,0xe70000000013,%v2,4095(%v3,%r2),1 + +#CHECK: e7 56 ff f1 20 4a vftci %v5, %v6, 4095, 2, 1 + .insn vri,0xe7000000004a,%v5,%v6,4095,2,1 + +#CHECK: e7 20 2f ff 30 06 vl %v2, 4095(%r2), 3 + .insn vrx,0xe70000000006,%v2,4095(%r2),3 + +#CHECK: e7 16 00 01 00 21 vlgvb %r1, %v6, 1 + .insn vrs,0xe70000003021,%r1,%v6,1(%r0),0 +#CHECK: e7 16 00 00 30 21 vlgvg %r1, %v6, 0 + .insn vrs,0xe70000003021,%r1,%v6,0(%r0),3 + +#CHECK: e7 37 00 00 00 56 vlr %v3, %v7 + .insn vrr,0xe70000000056,%v3,%v7,0,0,0,0 +#CHECK: e7 37 60 18 30 eb wfchdbs %f3, %f7, %f6 + .insn vrr,0xe700000000eb,%v3,%v7,%v6,3,8,1 + +#CHECK: e6 0c 20 0c 01 35 vlrl %v16, 12(%r2), 12 + .insn vsi,0xe60000000035,%v16,12(%r2),12 + From 34b34e90fc3299debfda4add0e277f59b0a699da Mon Sep 17 00:00:00 2001 From: Joachim Protze Date: Mon, 5 Oct 2020 13:30:22 +0200 Subject: [PATCH 029/321] [OpenMP][Tests] NFC: fix flaky test failure caused by rare scheduling The worker thread can start execution of the task before creation of the second task Fixes the spurious failure reported in https://reviews.llvm.org/D61657 --- openmp/runtime/test/ompt/tasks/task_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/runtime/test/ompt/tasks/task_memory.c b/openmp/runtime/test/ompt/tasks/task_memory.c index a48cef22bf4cd..5a459b40857e5 100644 --- a/openmp/runtime/test/ompt/tasks/task_memory.c +++ b/openmp/runtime/test/ompt/tasks/task_memory.c @@ -96,7 +96,7 @@ ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, // CHECK-SAME: memory_addr=[[NULL]], memory_size=0, result=0 // CHECK: ompt_event_task_create: task_id=[[TASK_ID_0:[0-9]+]] -// CHECK: ompt_event_task_create: task_id=[[TASK_ID_1:[0-9]+]] +// CHECK-DAG: ompt_event_task_create: task_id=[[TASK_ID_1:[0-9]+]] // Expects non-zero address, size, and result // CHECK-DAG: ompt_event_task_schedule: task_id=[[TASK_ID_0]], From eaf73293cb6b8d45dd85ffced57aea7ad4177754 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Sun, 4 Oct 2020 18:12:01 -0400 Subject: [PATCH 030/321] [OpenMP] Add Error Handling for Conflicting Pointer Sizes for Target Offload Summary: This patch adds an error to Clang that detects if OpenMP offloading is used between two architectures with incompatible pointer sizes. This ensures that the data mapping can be done correctly and solves an issue in code generation generating the wrong size pointer. This patch adds a new lit substitution, %omp_powerpc_triple that, if the system is 32-bit or 64-bit, sets the powerpc triple accordingly. This was required to fix some OpenMP tests that automatically populated the target architecture. Reviewers: jdoerfert Subscribers: cfe-commits guansong sstefan1 yaxunl delcypher Tags: OpenMP clang LLVM Differential Revision: https://reviews.llvm.org/D88594 --- .../clang/Basic/DiagnosticDriverKinds.td | 1 + clang/lib/Frontend/CompilerInvocation.cpp | 8 ++++++ .../distribute_parallel_for_if_codegen.cpp | 24 ++++++++--------- ...ibute_parallel_for_num_threads_codegen.cpp | 24 ++++++++--------- ...istribute_parallel_for_simd_if_codegen.cpp | 26 +++++++++---------- ..._parallel_for_simd_num_threads_codegen.cpp | 24 ++++++++--------- ...arallel_reduction_codegen_tbaa_PR46146.cpp | 4 +-- ...get_incompatible_architecture_messages.cpp | 14 ++++++++++ ...ams_distribute_parallel_for_if_codegen.cpp | 24 ++++++++--------- ...istribute_parallel_for_simd_if_codegen.cpp | 26 +++++++++---------- ...ams_distribute_parallel_for_if_codegen.cpp | 24 ++++++++--------- ...istribute_parallel_for_simd_if_codegen.cpp | 26 +++++++++---------- llvm/utils/lit/lit/llvm/config.py | 2 ++ 13 files changed, 126 insertions(+), 101 deletions(-) create mode 100644 clang/test/OpenMP/target_incompatible_architecture_messages.cpp diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 3bf1bb19b7ae3..29bc19e5a84e5 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -253,6 +253,7 @@ def err_drv_optimization_remark_format : Error< "unknown remark serializer format: '%0'">; def err_drv_no_neon_modifier : Error<"[no]neon is not accepted as modifier, please use [no]simd instead">; def err_drv_invalid_omp_target : Error<"OpenMP target is invalid: '%0'">; +def err_drv_incompatible_omp_arch : Error<"OpenMP target architecture '%0' pointer size is incompatible with host '%1'">; def err_drv_omp_host_ir_file_not_found : Error< "The provided host compiler IR file '%0' is required to generate code for OpenMP target regions but cannot be found.">; def err_drv_omp_host_target_not_supported : Error< diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index b402f53cc765b..bbdf0e3be7ae0 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3206,6 +3206,14 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, TT.getArch() == llvm::Triple::x86 || TT.getArch() == llvm::Triple::x86_64)) Diags.Report(diag::err_drv_invalid_omp_target) << A->getValue(i); + else if ((T.isArch64Bit() && TT.isArch32Bit()) || + (T.isArch64Bit() && TT.isArch16Bit()) || + (T.isArch32Bit() && TT.isArch64Bit()) || + (T.isArch32Bit() && TT.isArch16Bit()) || + (T.isArch16Bit() && TT.isArch32Bit()) || + (T.isArch16Bit() && TT.isArch64Bit())) + Diags.Report(diag::err_drv_incompatible_omp_arch) + << A->getValue(i) << T.str(); else Opts.OMPTargetTriples.push_back(TT); } diff --git a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp index c62832a2705fa..2b766f136d1d9 100644 --- a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp @@ -1,19 +1,19 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics diff --git a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp index ba8545a297c0e..d72dab1832b89 100644 --- a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp @@ -1,19 +1,19 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp index 1e6f0c67247f8..219bad5ad7941 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp @@ -1,16 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 - -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics #ifndef HEADER diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp index 09bc36388d69d..eeb7dd0caaf39 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp @@ -1,19 +1,19 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp index aefe00f1cadf9..031c7b6c778e4 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -1,8 +1,8 @@ // RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s // RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER diff --git a/clang/test/OpenMP/target_incompatible_architecture_messages.cpp b/clang/test/OpenMP/target_incompatible_architecture_messages.cpp new file mode 100644 index 0000000000000..f0f9d236d764d --- /dev/null +++ b/clang/test/OpenMP/target_incompatible_architecture_messages.cpp @@ -0,0 +1,14 @@ +// RUN: not %clang_cc1 -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -o - %s 2>&1 | FileCheck %s +// RUN: not %clang_cc1 -x c++ -fopenmp -triple i386-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -o - %s 2>&1 | FileCheck %s +// RUN: not %clang_cc1 -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -o - %s 2>&1 | FileCheck %s +// RUN: not %clang_cc1 -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -o - %s 2>&1 | FileCheck %s +// CHECK: error: OpenMP target architecture '{{.+}}' pointer size is incompatible with host '{{.+}}' +#ifndef HEADER +#define HEADER + +void test() { +#pragma omp target + {} +} + +#endif diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp index 87538e4d50da2..1f6c96a3fad5c 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp @@ -1,19 +1,19 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY // SIMD-ONLY-NOT: {{__kmpc|__tgt}} -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY // SIMD-ONLY-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp index 50de0a5e1dff3..107400192a24e 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp @@ -1,16 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 - -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY // SIMD-ONLY-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp index 55f76aa8aa423..3c2664b558fe8 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp @@ -1,19 +1,19 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp index 2f37f23775804..0afb9c9b7f1db 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp @@ -1,16 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 - -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics #ifndef HEADER diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py index c8013945e3f93..6fd7c4434e3ae 100644 --- a/llvm/utils/lit/lit/llvm/config.py +++ b/llvm/utils/lit/lit/llvm/config.py @@ -456,6 +456,8 @@ def use_clang(self, additional_tool_dirs=[], additional_flags=[], required=True) self.make_itanium_abi_triple(self.config.target_triple))) self.config.substitutions.append(('%ms_abi_triple', self.make_msabi_triple(self.config.target_triple))) + self.config.substitutions.append(('%omp_powerpc_triple', + 'powerpc' + str(sys.hash_info.width) + 'le-ibm-linux-gnu')) self.config.substitutions.append( ('%resource_dir', builtin_include_dir)) From 89e8a8b223b2e20bb63e930ddb78cb80a3ed45a2 Mon Sep 17 00:00:00 2001 From: Wenlei He Date: Mon, 5 Oct 2020 08:12:54 -0700 Subject: [PATCH 031/321] Revert SVML support for sqrt As was brought up in D87169 by @craig.topper we shouldn't map llvm.sqrt to svml since there is a faster native instruction. https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_p&expand=5824,5823,5356,5823,5825,5365,5356 Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D88620 --- llvm/include/llvm/Analysis/VecFuncs.def | 8 ---- .../LoopVectorize/X86/svml-calls.ll | 48 ------------------- 2 files changed, 56 deletions(-) diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index a47ee3c147252..d3c8973b15ad2 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -309,14 +309,6 @@ TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf4", 4) TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf8", 8) TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf16", 16) -TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt2", 2) -TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt4", 4) -TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt8", 8) - -TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf4", 4) -TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf8", 8) -TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf16", 16) - TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2) TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4) TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8) diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll index da6b4696ba2ba..42c280df6ad02 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll @@ -40,8 +40,6 @@ declare float @llvm.log10.f32(float) #0 declare double @sqrt(double) #0 declare float @sqrtf(float) #0 -declare double @llvm.sqrt.f64(double) #0 -declare float @llvm.sqrt.f32(float) #0 declare double @exp2(double) #0 declare float @exp2f(float) #0 @@ -746,52 +744,6 @@ for.end: ret void } -define void @sqrt_f64_intrinsic(double* nocapture %varray) { -; CHECK-LABEL: @sqrt_f64_intrinsic( -; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]]) -; CHECK: ret void -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %tmp = trunc i64 %iv to i32 - %conv = sitofp i32 %tmp to double - %call = tail call double @llvm.sqrt.f64(double %conv) - %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv - store double %call, double* %arrayidx, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} - -define void @sqrt_f32_intrinsic(float* nocapture %varray) { -; CHECK-LABEL: @sqrt_f32_intrinsic( -; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]]) -; CHECK: ret void -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %tmp = trunc i64 %iv to i32 - %conv = sitofp i32 %tmp to float - %call = tail call float @llvm.sqrt.f32(float %conv) - %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv - store float %call, float* %arrayidx, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} - define void @exp2_f64(double* nocapture %varray) { ; CHECK-LABEL: @exp2_f64( ; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]]) From c3f12dd6069ebfc913953a4a29e9db5dd4749160 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 5 Oct 2020 10:21:57 -0500 Subject: [PATCH 032/321] [docs] Revise loop terminology reference. Motivated by D88183, this seeks to clarify the current loop nomenclature with added illustrations, examples for possibly unexpected situations (infinite loops not part of the "parent" loop, logical loops sharing the same header, ...), and clarification on what other sources may consider a loop. The current document also has multiple errors that are fixed here. Some selected errors: * Loops a defined as strongly-connected components. A component a partition of all nodes, i.e. a subloop can never be a component. That is, the document as it currently is only covers top-level loops, even it also uses the term SCC for subloops. * "a block can be the header of two separate loops at the same time" (it is considered a single loop by LoopInfo) * "execute before some interesting event happens" (some interesting event is not well-defined) Reviewed By: baziotis, Whitney Differential Revision: https://reviews.llvm.org/D88408 --- llvm/docs/LangRef.rst | 2 + llvm/docs/LoopTerminology.rst | 314 +++-- llvm/docs/loop-guard.svg | 1079 ++++++++++++++++ llvm/docs/loop-irreducible.svg | 772 ++++++++++++ llvm/docs/loop-merge.svg | 660 ++++++++++ llvm/docs/loop-nested.svg | 874 +++++++++++++ llvm/docs/loop-nonmaximal.svg | 1280 +++++++++++++++++++ llvm/docs/loop-separate.svg | 690 +++++++++++ llvm/docs/loop-single.svg | 338 +++++ llvm/docs/loop-terminology.svg | 2111 ++++++++++++++++++++++++++++++++ 10 files changed, 8024 insertions(+), 96 deletions(-) create mode 100644 llvm/docs/loop-guard.svg create mode 100644 llvm/docs/loop-irreducible.svg create mode 100644 llvm/docs/loop-merge.svg create mode 100644 llvm/docs/loop-nested.svg create mode 100644 llvm/docs/loop-nonmaximal.svg create mode 100644 llvm/docs/loop-separate.svg create mode 100644 llvm/docs/loop-single.svg create mode 100644 llvm/docs/loop-terminology.svg diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index a31b5304d66b3..cd5c9e23b746d 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -20066,6 +20066,8 @@ not overflow at link time under the medium code model if ``x`` is an a constant initializer folded into a function body. This intrinsic can be used to avoid the possibility of overflows when loading from such a constant. +.. _llvm_sideeffect: + '``llvm.sideeffect``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/docs/LoopTerminology.rst b/llvm/docs/LoopTerminology.rst index 0870ab9fcac13..ec925213c3d67 100644 --- a/llvm/docs/LoopTerminology.rst +++ b/llvm/docs/LoopTerminology.rst @@ -7,119 +7,241 @@ LLVM Loop Terminology (and Canonical Forms) .. contents:: :local: -Introduction -============ +Loop Definition +=============== -Loops are a core concept in any optimizer. This page spells out some -of the common terminology used within LLVM code to describe loop -structures. +Loops are an important concept for a code optimizer. In LLVM, detection +of loops in a control-flow graph is done by :ref:`loopinfo`. It is based +on the following definition. -First, let's start with the basics. In LLVM, a Loop is a maximal set of basic -blocks that form a strongly connected component (SCC) in the Control -Flow Graph (CFG) where there exists a dedicated entry/header block that -dominates all other blocks within the loop. Thus, without leaving the -loop, one can reach every block in the loop from the header block and -the header block from every block in the loop. +A loop is a subset of nodes from the control-flow graph (CFG; where +nodes represent basic blocks) with the following properties: -Note that there are some important implications of this definition: +1. The induced subgraph (which is the subgraph that contains all the + edges from the CFG within the loop) is strongly connected + (every node is reachable from all others). -* Not all SCCs are loops. There exist SCCs that do not meet the - dominance requirement and such are not considered loops. +2. All edges from outside the subset into the subset point to the same + node, called the **header**. As a consequence, the header dominates + all nodes in the loop (i.e. every execution path to any of the loop's + node will have to pass through the header). -* Loops can contain non-loop SCCs and non-loop SCCs may contain - loops. Loops may also contain sub-loops. +3. The loop is the maximum subset with these properties. That is, no + additional nodes from the CFG can be added such that the induced + subgraph would still be strongly connected and the header would + remain the same. -* A header block is uniquely associated with one loop. There can be - multiple SCC within that loop, but the strongly connected component - (SCC) formed from their union must always be unique. +In computer science literature, this is often called a *natural loop*. +In LLVM, this is the only definition of a loop. -* Given the use of dominance in the definition, all loops are - statically reachable from the entry of the function. -* Every loop must have a header block, and some set of predecessors - outside the loop. A loop is allowed to be statically infinite, so - there need not be any exiting edges. +Terminology +----------- -* Any two loops are either fully disjoint (no intersecting blocks), or - one must be a sub-loop of the other. +The definition of a loop comes with some additional terminology: -* Loops in a function form a forest. One implication of this fact - is that a loop either has no parent or a single parent. +* An **entering block** (or **loop predecessor**) is a non-loop node + that has an edge into the loop (necessarily the header). If there is + only one entering block entering block, and its only edge is to the + header, it is also called the loop's **preheader**. The preheader + dominates the loop without itself being part of the loop. -A loop may have an arbitrary number of exits, both explicit (via -control flow) and implicit (via throwing calls which transfer control -out of the containing function). There is no special requirement on -the form or structure of exit blocks (the block outside the loop which -is branched to). They may have multiple predecessors, phis, etc... +* A **latch** is a loop node that has an edge to the header. -Key Terminology -=============== +* A **backedge** is an edge from a latch to the header. + +* An **exiting edge** is an edge from inside the loop to a node outside + of the loop. The source of such an edge is called an **exiting block**, its + target is an **exit block**. + +.. image:: ./loop-terminology.svg + :width: 400 px + + +Important Notes +--------------- + +This loop definition has some noteworthy consequences: + +* A node can be the header of at most one loop. As such, a loop can be + identified by its header. Due to the header being the only entry into + a loop, it can be called a Single-Entry-Multiple-Exits (SEME) region. + + +* For basic blocks that are not reachable from the function's entry, the + concept of loops is undefined. This follows from the concept of + dominance being undefined as well. + + +* The smallest loop consists of a single basic block that branches to + itself. In this case that block is the header, latch (and exiting + block if it has another edge to a different block) at the same time. + A single block that has no branch to itself is not considered a loop, + even though it is trivially strongly connected. + +.. image:: ./loop-single.svg + :width: 300 px + +In this case, the role of header, exiting block and latch fall to the +same node. :ref:`loopinfo` reports this as: + +.. code-block:: console + + $ opt input.ll -loops -analyze + Loop at depth 1 containing: %for.body
-**Header Block** - The basic block which dominates all other blocks -contained within the loop. As such, it is the first one executed if -the loop executes at all. Note that a block can be the header of -two separate loops at the same time, but only if one is a sub-loop -of the other. - -**Exiting Block** - A basic block contained within a given loop which has -at least one successor outside of the loop and one successor inside the -loop. (The latter is a consequence of the block being contained within -an SCC which is part of the loop.) That is, it has a successor which -is an Exit Block. - -**Exit Block** - A basic block outside of the associated loop which has a -predecessor inside the loop. That is, it has a predecessor which is -an Exiting Block. - -**Latch Block** - A basic block within the loop whose successors include -the header block of the loop. Thus, a latch is a source of backedge. -A loop may have multiple latch blocks. A latch block may be either -conditional or unconditional. - -**Backedge(s)** - The edge(s) in the CFG from latch blocks to the header -block. Note that there can be multiple such edges, and even multiple -such edges leaving a single latch block. - -**Loop Predecessor** - The predecessor blocks of the loop header which -are not contained by the loop itself. These are the only blocks -through which execution can enter the loop. When used in the -singular form implies that there is only one such unique block. - -**Preheader Block** - A preheader is a (singular) loop predecessor which -ends in an unconditional transfer of control to the loop header. Note -that not all loops have such blocks. - -**Backedge Taken Count** - The number of times the backedge will execute -before some interesting event happens. Commonly used without -qualification of the event as a shorthand for when some exiting block -branches to some exit block. May be zero, or not statically computable. - -**Iteration Count** - The number of times the header will execute before -some interesting event happens. Commonly used without qualification to -refer to the iteration count at which the loop exits. Will always be -one greater than the backedge taken count. *Warning*: Preceding -statement is true in the *integer domain*; if you're dealing with fixed -width integers (such as LLVM Values or SCEVs), you need to be cautious -of overflow when converting one to the other. - -It's important to note that the same basic block can play multiple -roles in the same loop, or in different loops at once. For example, a -single block can be the header for two nested loops at once, while -also being an exiting block for the inner one only, and an exit block -for a sibling loop. Example: + +* Loops can be nested inside each other. That is, a loop's node set can + be a subset of another loop with a different loop header. The loop + hierarchy in a function forms a forest: Each top-level loop is the + root of the tree of the loops nested inside it. + +.. image:: ./loop-nested.svg + :width: 350 px + + +* It is not possible that two loops share only a few of their nodes. + Two loops are either disjoint or one is nested inside the other. In + the example below the left and right subsets both violate the + maximality condition. Only the merge of both sets is considered a loop. + +.. image:: ./loop-nonmaximal.svg + :width: 250 px + + +* It is also possible that two logical loops share a header, but are + considered a single loop by LLVM: .. code-block:: C - while (..) { - for (..) {} - do { - do { - // <-- block of interest - if (exit) break; - } while (..); - } while (..) + for (int i = 0; i < 128; ++i) + for (int j = 0; j < 128; ++j) + body(i,j); + +which might be represented in LLVM-IR as follows. Note that there is +only a single header and hence just a single loop. + +.. image:: ./loop-merge.svg + :width: 400 px + +The :ref:`LoopSimplify ` pass will +detect the loop and ensure separate headers for the outer and inner loop. + +.. image:: ./loop-separate.svg + :width: 400 px + +* A cycle in the CFG does not imply there is a loop. The example below + shows such a CFG, where there is no header node that dominates all + other nodes in the cycle. This is called **irreducible control-flow**. + +.. image:: ./loop-irreducible.svg + :width: 150 px + +The term reducible results from the ability to collapse the CFG into a +single node by successively replacing one of three base structures with +a single node: A sequential execution of basic blocks, a conditional +branching (or switch) with re-joining, and a basic block looping on itself. +`Wikipedia `_ +has a more formal definition, which basically says that every cycle has +a dominating header. + + +* Irreducible control-flow can occur at any level of the loop nesting. + That is, a loop that itself does not contain any loops can still have + cyclic control flow in its body; a loop that is not nested inside + another loop can still be part of an outer cycle; and there can be + additional cycles between any two loops where one is contained in the other. + + +* Exiting edges are not the only way to break out of a loop. Other + possibilities are unreachable terminators, [[noreturn]] functions, + exceptions, signals, and your computer's power button. + + +* A basic block "inside" the loop that does not have a path back to the + loop (i.e. to a latch or header) is not considered part of the loop. + This is illustrated by the following code. + +.. code-block:: C + + for (unsigned i = 0; i <= n; ++i) { + if (c1) { + // When reaching this block, we will have exited the loop. + do_something(); + break; + } + if (c2) { + // abort(), never returns, so we have exited the loop. + abort(); + } + if (c3) { + // The unreachable allows the compiler to assume that this will not rejoin the loop. + do_something(); + __builtin_unreachable(); + } + if (c4) { + // This statically infinite loop is not nested because control-flow will not continue with the for-loop. + while(true) { + do_something(); + } + } } + +* There is no requirement for the control flow to eventually leave the + loop, i.e. a loop can be infinite. A **statically infinite loop** is a + loop that has no exiting edges. A **dynamically infinite loop** has + exiting edges, but it is possible to be never taken. This may happen + only under some circumstances, such as when n == UINT_MAX in the code + below. + +.. code-block:: C + + for (unsigned i = 0; i <= n; ++i) + body(i); + +It is possible for the optimizer to turn a dynamically infinite loop +into a statically infinite loop, for instance when it can prove that the +exiting condition is always false. Because the exiting edge is never +taken, the optimizer can change the conditional branch into an +unconditional one. + +Note that under some circumstances the compiler may assume that a loop will +eventually terminate without proving it. For instance, it may remove a loop +that does not do anything in its body. If the loop was infinite, this +optimization resulted in an "infinite" performance speed-up. A call +to the intrinsic :ref:`llvm.sideeffect` can be added +into the loop to ensure that the optimizer does not make this assumption +without proof. + + +* The number of executions of the loop header before leaving the loop is + the **loop trip count** (or **iteration count**). If the loop should + not be executed at all, a **loop guard** must skip the entire loop: + +.. image:: ./loop-guard.svg + :width: 500 px + +Since the first thing a loop header might do is to check whether there +is another execution and if not, immediately exit without doing any work +(also see :ref:`loop-terminology-loop-rotate`), loop trip count is not +the best measure of a loop's number of iterations. For instance, the +number of header executions of the code below for a non-positive n +(before loop rotation) is 1, even though the loop body is not executed +at all. + +.. code-block:: C + + for (int i = 0; i < n; ++i) + body(i); + +A better measure is the **backedge-taken count**, which is the number of +times any of the backedges is taken before the loop. It is one less than +the trip count for executions that enter the header. + + +.. _loopinfo: + LoopInfo ======== @@ -139,7 +261,7 @@ are important for working successfully with this interface. be removed from LoopInfo. If this can not be done for some reason, then the optimization is *required* to preserve the static reachability of the loop. - + .. _loop-terminology-loop-simplify: diff --git a/llvm/docs/loop-guard.svg b/llvm/docs/loop-guard.svg new file mode 100644 index 0000000000000..dbb930f0573e2 --- /dev/null +++ b/llvm/docs/loop-guard.svg @@ -0,0 +1,1079 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/llvm/docs/loop-irreducible.svg b/llvm/docs/loop-irreducible.svg new file mode 100644 index 0000000000000..f9e588916270b --- /dev/null +++ b/llvm/docs/loop-irreducible.svg @@ -0,0 +1,772 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/llvm/docs/loop-merge.svg b/llvm/docs/loop-merge.svg new file mode 100644 index 0000000000000..ea5e574b2d51d --- /dev/null +++ b/llvm/docs/loop-merge.svg @@ -0,0 +1,660 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/llvm/docs/loop-nested.svg b/llvm/docs/loop-nested.svg new file mode 100644 index 0000000000000..372a3b43ec481 --- /dev/null +++ b/llvm/docs/loop-nested.svg @@ -0,0 +1,874 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/llvm/docs/loop-nonmaximal.svg b/llvm/docs/loop-nonmaximal.svg new file mode 100644 index 0000000000000..cb6e4d64b5030 --- /dev/null +++ b/llvm/docs/loop-nonmaximal.svg @@ -0,0 +1,1280 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/llvm/docs/loop-separate.svg b/llvm/docs/loop-separate.svg new file mode 100644 index 0000000000000..fafd3eb42aeb5 --- /dev/null +++ b/llvm/docs/loop-separate.svg @@ -0,0 +1,690 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/llvm/docs/loop-single.svg b/llvm/docs/loop-single.svg new file mode 100644 index 0000000000000..6f9720a0dcfdd --- /dev/null +++ b/llvm/docs/loop-single.svg @@ -0,0 +1,338 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/llvm/docs/loop-terminology.svg b/llvm/docs/loop-terminology.svg new file mode 100644 index 0000000000000..6bed1733bb8a6 --- /dev/null +++ b/llvm/docs/loop-terminology.svg @@ -0,0 +1,2111 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 665371d0b29910d7fba618a707d6b732e2037ee2 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Thu, 1 Oct 2020 21:51:54 +0200 Subject: [PATCH 033/321] [mlir] Split alloc-like op LLVM lowerings into base and separate derived classes. The previous code did the lowering to alloca, malloc, and aligned_malloc in a single class with different code paths that are somewhat difficult to follow. This change moves the common code to a base class and has a separte derived class per lowering target that contains the specifics. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D88696 --- .../StandardToLLVM/ConvertStandardToLLVM.h | 14 + .../StandardToLLVM/StandardToLLVM.cpp | 554 +++++++++--------- .../convert-dynamic-memref-ops.mlir | 21 +- .../convert-static-memref-ops.mlir | 33 +- 4 files changed, 328 insertions(+), 294 deletions(-) diff --git a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h index d98a0ff6efb36..645f4cd265810 100644 --- a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h +++ b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h @@ -412,6 +412,7 @@ class ConvertToLLVMPattern : public ConversionPattern { LLVMTypeConverter &typeConverter, PatternBenefit benefit = 1); +protected: /// Returns the LLVM dialect. LLVM::LLVMDialect &getDialect() const; @@ -419,6 +420,10 @@ class ConvertToLLVMPattern : public ConversionPattern { /// defined by the used type converter. LLVM::LLVMType getIndexType() const; + /// Gets the MLIR type wrapping the LLVM integer type whose bit width + /// corresponds to that of a LLVM pointer type. + LLVM::LLVMType getIntPtrType(unsigned addressSpace = 0) const; + /// Gets the MLIR type wrapping the LLVM void type. LLVM::LLVMType getVoidType() const; @@ -470,6 +475,15 @@ class ConvertToLLVMPattern : public ConversionPattern { ArrayRef shape, ConversionPatternRewriter &rewriter) const; + /// Creates and populates the memref descriptor struct given all its fields. + /// 'strides' can be either dynamic (kDynamicStrideOrOffset) or static, but + /// not a mix of the two. + MemRefDescriptor + createMemRefDescriptor(Location loc, MemRefType memRefType, + Value allocatedPtr, Value alignedPtr, uint64_t offset, + ArrayRef strides, ArrayRef sizes, + ConversionPatternRewriter &rewriter) const; + protected: /// Reference to the type converter, with potential extensions. LLVMTypeConverter &typeConverter; diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 731eab0c28dfc..75d07f35d226f 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -872,6 +872,13 @@ LLVM::LLVMType ConvertToLLVMPattern::getIndexType() const { return typeConverter.getIndexType(); } +LLVM::LLVMType +ConvertToLLVMPattern::getIntPtrType(unsigned addressSpace) const { + return LLVM::LLVMType::getIntNTy( + &typeConverter.getContext(), + typeConverter.getPointerBitwidth(addressSpace)); +} + LLVM::LLVMType ConvertToLLVMPattern::getVoidType() const { return LLVM::LLVMType::getVoidTy(&typeConverter.getContext()); } @@ -911,12 +918,12 @@ Value ConvertToLLVMPattern::getStridedElementPtr( Value base = memRefDescriptor.alignedPtr(rewriter, loc); Value offsetValue = offset == MemRefType::getDynamicStrideOrOffset() ? memRefDescriptor.offset(rewriter, loc) - : this->createIndexConstant(rewriter, loc, offset); + : createIndexConstant(rewriter, loc, offset); for (int i = 0, e = indices.size(); i < e; ++i) { Value stride = strides[i] == MemRefType::getDynamicStrideOrOffset() ? memRefDescriptor.stride(rewriter, loc, i) - : this->createIndexConstant(rewriter, loc, strides[i]); + : createIndexConstant(rewriter, loc, strides[i]); Value additionalOffset = rewriter.create(loc, indices[i], stride); offsetValue = @@ -973,19 +980,69 @@ Value ConvertToLLVMPattern::getSizeInBytes( } Value ConvertToLLVMPattern::getCumulativeSizeInBytes( - Location loc, Type elementType, ArrayRef sizes, + Location loc, Type elementType, ArrayRef shape, ConversionPatternRewriter &rewriter) const { // Compute the total number of memref elements. Value cumulativeSizeInBytes = - sizes.empty() ? createIndexConstant(rewriter, loc, 1) : sizes.front(); - for (unsigned i = 1, e = sizes.size(); i < e; ++i) + shape.empty() ? createIndexConstant(rewriter, loc, 1) : shape.front(); + for (unsigned i = 1, e = shape.size(); i < e; ++i) cumulativeSizeInBytes = rewriter.create( - loc, getIndexType(), ArrayRef{cumulativeSizeInBytes, sizes[i]}); + loc, getIndexType(), ArrayRef{cumulativeSizeInBytes, shape[i]}); auto elementSize = this->getSizeInBytes(loc, elementType, rewriter); return rewriter.create( loc, getIndexType(), ArrayRef{cumulativeSizeInBytes, elementSize}); } +/// Creates and populates the memref descriptor struct given all its fields. +MemRefDescriptor ConvertToLLVMPattern::createMemRefDescriptor( + Location loc, MemRefType memRefType, Value allocatedPtr, Value alignedPtr, + uint64_t offset, ArrayRef strides, ArrayRef sizes, + ConversionPatternRewriter &rewriter) const { + auto structType = typeConverter.convertType(memRefType); + auto memRefDescriptor = MemRefDescriptor::undef(rewriter, loc, structType); + + // Field 1: Allocated pointer, used for malloc/free. + memRefDescriptor.setAllocatedPtr(rewriter, loc, allocatedPtr); + + // Field 2: Actual aligned pointer to payload. + memRefDescriptor.setAlignedPtr(rewriter, loc, alignedPtr); + + // Field 3: Offset in aligned pointer. + memRefDescriptor.setOffset(rewriter, loc, + createIndexConstant(rewriter, loc, offset)); + + if (memRefType.getRank() == 0) + // No size/stride descriptor in memref, return the descriptor value. + return memRefDescriptor; + + // Fields 4 and 5: sizes and strides of the strided MemRef. + // Store all sizes in the descriptor. Only dynamic sizes are passed in as + // operands to AllocOp. + Value runningStride = nullptr; + // Iterate strides in reverse order, compute runningStride and strideValues. + auto nStrides = strides.size(); + SmallVector strideValues(nStrides, nullptr); + for (unsigned i = 0; i < nStrides; ++i) { + int64_t index = nStrides - 1 - i; + if (strides[index] == MemRefType::getDynamicStrideOrOffset()) + // Identity layout map is enforced in the match function, so we compute: + // `runningStride *= sizes[index + 1]` + runningStride = runningStride ? rewriter.create( + loc, runningStride, sizes[index + 1]) + : createIndexConstant(rewriter, loc, 1); + else + runningStride = createIndexConstant(rewriter, loc, strides[index]); + strideValues[index] = runningStride; + } + // Fill size and stride descriptors in memref. + for (auto indexedSize : llvm::enumerate(sizes)) { + int64_t index = indexedSize.index(); + memRefDescriptor.setSize(rewriter, loc, index, indexedSize.value()); + memRefDescriptor.setStride(rewriter, loc, index, strideValues[index]); + } + return memRefDescriptor; +} + /// Only retain those attributes that are not constructed by /// `LLVMFuncOp::build`. If `filterArgAttrs` is set, also filter out argument /// attributes. @@ -1710,251 +1767,84 @@ static bool isSupportedMemRefType(MemRefType type) { } /// Lowering for AllocOp and AllocaOp. -template -struct AllocLikeOpLowering : public ConvertOpToLLVMPattern { - using ConvertOpToLLVMPattern::createIndexConstant; - using ConvertOpToLLVMPattern::getIndexType; - using ConvertOpToLLVMPattern::typeConverter; - using ConvertOpToLLVMPattern::getVoidPtrType; +struct AllocLikeOpLowering : public ConvertToLLVMPattern { + using ConvertToLLVMPattern::createIndexConstant; + using ConvertToLLVMPattern::getIndexType; + using ConvertToLLVMPattern::getVoidPtrType; + using ConvertToLLVMPattern::typeConverter; + + explicit AllocLikeOpLowering(StringRef opName, LLVMTypeConverter &converter) + : ConvertToLLVMPattern(opName, &converter.getContext(), converter) {} + +protected: + // Returns 'input' aligned up to 'alignment'. Computes + // bumped = input + alignement - 1 + // aligned = bumped - bumped % alignment + static Value createAligned(ConversionPatternRewriter &rewriter, Location loc, + Value input, Value alignment) { + Value one = createIndexAttrConstant(rewriter, loc, alignment.getType(), 1); + Value bump = rewriter.create(loc, alignment, one); + Value bumped = rewriter.create(loc, input, bump); + Value mod = rewriter.create(loc, bumped, alignment); + return rewriter.create(loc, bumped, mod); + } + + // Creates a call to an allocation function with params and casts the + // resulting void pointer to ptrType. + Value createAllocCall(Location loc, StringRef name, Type ptrType, + ArrayRef params, ModuleOp module, + ConversionPatternRewriter &rewriter) const { + SmallVector paramTypes; + auto allocFuncOp = module.lookupSymbol(name); + if (!allocFuncOp) { + for (Value param : params) + paramTypes.push_back(param.getType().cast()); + auto allocFuncType = + LLVM::LLVMType::getFunctionTy(getVoidPtrType(), paramTypes, + /*isVarArg=*/false); + OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(module.getBody()); + allocFuncOp = rewriter.create(rewriter.getUnknownLoc(), + name, allocFuncType); + } + auto allocFuncSymbol = rewriter.getSymbolRefAttr(allocFuncOp); + auto allocatedPtr = rewriter + .create(loc, getVoidPtrType(), + allocFuncSymbol, params) + .getResult(0); + return rewriter.create(loc, ptrType, allocatedPtr); + } - explicit AllocLikeOpLowering(LLVMTypeConverter &converter) - : ConvertOpToLLVMPattern(converter) {} + /// Allocates the underlying buffer. Returns the allocated pointer and the + /// aligned pointer. + virtual std::tuple + allocateBuffer(ConversionPatternRewriter &rewriter, Location loc, + Value cumulativeSize, Operation *op) const = 0; + +private: + static MemRefType getMemRefResultType(Operation *op) { + return op->getResult(0).getType().cast(); + } LogicalResult match(Operation *op) const override { - MemRefType memRefType = cast(op).getType(); + MemRefType memRefType = getMemRefResultType(op); if (isSupportedMemRefType(memRefType)) return success(); int64_t offset; SmallVector strides; - auto successStrides = getStridesAndOffset(memRefType, strides, offset); - if (failed(successStrides)) + if (failed(getStridesAndOffset(memRefType, strides, offset))) return failure(); // Dynamic strides are ok if they can be deduced from dynamic sizes (which - // is guaranteed when succeeded(successStrides)). Dynamic offset however can - // never be alloc'ed. + // is guaranteed when getStridesAndOffset succeeded. Dynamic offset however + // can never be alloc'ed. if (offset == MemRefType::getDynamicStrideOrOffset()) return failure(); return success(); } - // Returns bump = (alignment - (input % alignment))% alignment, which is the - // increment necessary to align `input` to `alignment` boundary. - // TODO: this can be made more efficient by just using a single addition - // and two bit shifts: (ptr + align - 1)/align, align is always power of 2. - Value createBumpToAlign(Location loc, OpBuilder b, Value input, - Value alignment) const { - Value modAlign = b.create(loc, input, alignment); - Value diff = b.create(loc, alignment, modAlign); - Value shift = b.create(loc, diff, alignment); - return shift; - } - - /// Creates and populates the memref descriptor struct given all its fields. - /// This method also performs any post allocation alignment needed for heap - /// allocations when `accessAlignment` is non null. This is used with - /// allocators that do not support alignment. - MemRefDescriptor createMemRefDescriptor( - Location loc, ConversionPatternRewriter &rewriter, MemRefType memRefType, - Value allocatedTypePtr, Value allocatedBytePtr, Value accessAlignment, - uint64_t offset, ArrayRef strides, ArrayRef sizes) const { - auto elementPtrType = this->getElementPtrType(memRefType); - auto structType = typeConverter.convertType(memRefType); - auto memRefDescriptor = MemRefDescriptor::undef(rewriter, loc, structType); - - // Field 1: Allocated pointer, used for malloc/free. - memRefDescriptor.setAllocatedPtr(rewriter, loc, allocatedTypePtr); - - // Field 2: Actual aligned pointer to payload. - Value alignedBytePtr = allocatedTypePtr; - if (accessAlignment) { - // offset = (align - (ptr % align))% align - Value intVal = rewriter.create( - loc, this->getIndexType(), allocatedBytePtr); - Value offset = createBumpToAlign(loc, rewriter, intVal, accessAlignment); - Value aligned = rewriter.create( - loc, allocatedBytePtr.getType(), allocatedBytePtr, offset); - alignedBytePtr = rewriter.create( - loc, elementPtrType, ArrayRef(aligned)); - } - memRefDescriptor.setAlignedPtr(rewriter, loc, alignedBytePtr); - - // Field 3: Offset in aligned pointer. - memRefDescriptor.setOffset(rewriter, loc, - createIndexConstant(rewriter, loc, offset)); - - if (memRefType.getRank() == 0) - // No size/stride descriptor in memref, return the descriptor value. - return memRefDescriptor; - - // Fields 4 and 5: sizes and strides of the strided MemRef. - // Store all sizes in the descriptor. Only dynamic sizes are passed in as - // operands to AllocOp. - Value runningStride = nullptr; - // Iterate strides in reverse order, compute runningStride and strideValues. - auto nStrides = strides.size(); - SmallVector strideValues(nStrides, nullptr); - for (unsigned i = 0; i < nStrides; ++i) { - int64_t index = nStrides - 1 - i; - if (strides[index] == MemRefType::getDynamicStrideOrOffset()) - // Identity layout map is enforced in the match function, so we compute: - // `runningStride *= sizes[index + 1]` - runningStride = runningStride - ? rewriter.create(loc, runningStride, - sizes[index + 1]) - : createIndexConstant(rewriter, loc, 1); - else - runningStride = createIndexConstant(rewriter, loc, strides[index]); - strideValues[index] = runningStride; - } - // Fill size and stride descriptors in memref. - for (auto indexedSize : llvm::enumerate(sizes)) { - int64_t index = indexedSize.index(); - memRefDescriptor.setSize(rewriter, loc, index, indexedSize.value()); - memRefDescriptor.setStride(rewriter, loc, index, strideValues[index]); - } - return memRefDescriptor; - } - - /// Returns the memref's element size in bytes. - // TODO: there are other places where this is used. Expose publicly? - static unsigned getMemRefEltSizeInBytes(MemRefType memRefType) { - auto elementType = memRefType.getElementType(); - - unsigned sizeInBits; - if (elementType.isIntOrFloat()) { - sizeInBits = elementType.getIntOrFloatBitWidth(); - } else { - auto vectorType = elementType.cast(); - sizeInBits = - vectorType.getElementTypeBitWidth() * vectorType.getNumElements(); - } - return llvm::divideCeil(sizeInBits, 8); - } - - /// Returns the alignment to be used for the allocation call itself. - /// aligned_alloc requires the allocation size to be a power of two, and the - /// allocation size to be a multiple of alignment, - Optional getAllocationAlignment(AllocOp allocOp) const { - // No alignment can be used for the 'malloc' call itself. - if (!typeConverter.getOptions().useAlignedAlloc) - return None; - - if (Optional alignment = allocOp.alignment()) - return *alignment; - - // Whenever we don't have alignment set, we will use an alignment - // consistent with the element type; since the allocation size has to be a - // power of two, we will bump to the next power of two if it already isn't. - auto eltSizeBytes = getMemRefEltSizeInBytes(allocOp.getType()); - return std::max(kMinAlignedAllocAlignment, - llvm::PowerOf2Ceil(eltSizeBytes)); - } - - /// Returns true if the memref size in bytes is known to be a multiple of - /// factor. - static bool isMemRefSizeMultipleOf(MemRefType type, uint64_t factor) { - uint64_t sizeDivisor = getMemRefEltSizeInBytes(type); - for (unsigned i = 0, e = type.getRank(); i < e; i++) { - if (type.isDynamic(type.getDimSize(i))) - continue; - sizeDivisor = sizeDivisor * type.getDimSize(i); - } - return sizeDivisor % factor == 0; - } - - /// Allocates the underlying buffer using the right call. `allocatedBytePtr` - /// is set to null for stack allocations. `accessAlignment` is set if - /// alignment is needed post allocation (for eg. in conjunction with malloc). - Value allocateBuffer(Location loc, Value cumulativeSize, Operation *op, - MemRefType memRefType, Value one, Value &accessAlignment, - Value &allocatedBytePtr, - ConversionPatternRewriter &rewriter) const { - auto elementPtrType = this->getElementPtrType(memRefType); - - // With alloca, one gets a pointer to the element type right away. - // For stack allocations. - if (auto allocaOp = dyn_cast(op)) { - allocatedBytePtr = nullptr; - accessAlignment = nullptr; - return rewriter.create( - loc, elementPtrType, cumulativeSize, - allocaOp.alignment() ? *allocaOp.alignment() : 0); - } - - // Heap allocations. - AllocOp allocOp = cast(op); - - Optional allocationAlignment = getAllocationAlignment(allocOp); - // Whether to use std lib function aligned_alloc that supports alignment. - bool useAlignedAlloc = allocationAlignment.hasValue(); - - // Insert the malloc/aligned_alloc declaration if it is not already present. - const auto *allocFuncName = useAlignedAlloc ? "aligned_alloc" : "malloc"; - auto module = allocOp.getParentOfType(); - auto allocFunc = module.lookupSymbol(allocFuncName); - if (!allocFunc) { - OpBuilder::InsertionGuard guard(rewriter); - rewriter.setInsertionPointToStart( - op->getParentOfType().getBody()); - SmallVector callArgTypes = {getIndexType()}; - // aligned_alloc(size_t alignment, size_t size) - if (useAlignedAlloc) - callArgTypes.push_back(getIndexType()); - allocFunc = rewriter.create( - rewriter.getUnknownLoc(), allocFuncName, - LLVM::LLVMType::getFunctionTy(getVoidPtrType(), callArgTypes, - /*isVarArg=*/false)); - } - - // Allocate the underlying buffer and store a pointer to it in the MemRef - // descriptor. - SmallVector callArgs; - if (useAlignedAlloc) { - // Use aligned_alloc. - assert(allocationAlignment && "allocation alignment should be present"); - auto alignedAllocAlignmentValue = rewriter.create( - loc, typeConverter.convertType(rewriter.getIntegerType(64)), - rewriter.getI64IntegerAttr(allocationAlignment.getValue())); - // aligned_alloc requires size to be a multiple of alignment; we will pad - // the size to the next multiple if necessary. - if (!isMemRefSizeMultipleOf(memRefType, allocationAlignment.getValue())) { - Value bump = createBumpToAlign(loc, rewriter, cumulativeSize, - alignedAllocAlignmentValue); - cumulativeSize = - rewriter.create(loc, cumulativeSize, bump); - } - callArgs = {alignedAllocAlignmentValue, cumulativeSize}; - } else { - // Adjust the allocation size to consider alignment. - if (Optional alignment = allocOp.alignment()) { - accessAlignment = createIndexConstant(rewriter, loc, *alignment); - } else if (!memRefType.getElementType().isSignlessIntOrIndexOrFloat()) { - // In the case where no alignment is specified, we may want to override - // `malloc's` behavior. `malloc` typically aligns at the size of the - // biggest scalar on a target HW. For non-scalars, use the natural - // alignment of the LLVM type given by the LLVM DataLayout. - accessAlignment = - this->getSizeInBytes(loc, memRefType.getElementType(), rewriter); - } - if (accessAlignment) - cumulativeSize = - rewriter.create(loc, cumulativeSize, accessAlignment); - callArgs.push_back(cumulativeSize); - } - auto allocFuncSymbol = rewriter.getSymbolRefAttr(allocFunc); - allocatedBytePtr = rewriter - .create(loc, getVoidPtrType(), - allocFuncSymbol, callArgs) - .getResult(0); - // For heap allocations, the allocated pointer is a cast of the byte pointer - // to the type pointer. - return rewriter.create(loc, elementPtrType, - allocatedBytePtr); - } - // An `alloc` is converted into a definition of a memref descriptor value and // a call to `malloc` to allocate the underlying data buffer. The memref // descriptor is of the LLVM structure type where: @@ -1964,15 +1854,16 @@ struct AllocLikeOpLowering : public ConvertOpToLLVMPattern { // 3. the remaining elements serve to store all the sizes and strides of the // memref using LLVM-converted `index` type. // - // Alignment is performed by allocating `alignment - 1` more bytes than + // Alignment is performed by allocating `alignment` more bytes than // requested and shifting the aligned pointer relative to the allocated - // memory. If alignment is unspecified, the two pointers are equal. + // memory. Note: `alignment - ` would actually be + // sufficient. If alignment is unspecified, the two pointers are equal. // An `alloca` is converted into a definition of a memref descriptor value and // an llvm.alloca to allocate the underlying data buffer. void rewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { - MemRefType memRefType = cast(op).getType(); + MemRefType memRefType = getMemRefResultType(op); auto loc = op->getLoc(); // Get actual sizes of the memref as values: static sizes are constant @@ -1983,17 +1874,12 @@ struct AllocLikeOpLowering : public ConvertOpToLLVMPattern { Value cumulativeSize = this->getCumulativeSizeInBytes( loc, memRefType.getElementType(), sizes, rewriter); + // Allocate the underlying buffer. - // Value holding the alignment that has to be performed post allocation - // (in conjunction with allocators that do not support alignment, eg. - // malloc); nullptr if no such adjustment needs to be performed. - Value accessAlignment; - // Byte pointer to the allocated buffer. - Value allocatedBytePtr; - Value allocatedTypePtr = - allocateBuffer(loc, cumulativeSize, op, memRefType, - createIndexConstant(rewriter, loc, 1), accessAlignment, - allocatedBytePtr, rewriter); + Value allocatedPtr; + Value alignedPtr; + std::tie(allocatedPtr, alignedPtr) = + this->allocateBuffer(rewriter, loc, cumulativeSize, op); int64_t offset; SmallVector strides; @@ -2010,25 +1896,163 @@ struct AllocLikeOpLowering : public ConvertOpToLLVMPattern { "unexpected number of strides"); // Create the MemRef descriptor. - auto memRefDescriptor = createMemRefDescriptor( - loc, rewriter, memRefType, allocatedTypePtr, allocatedBytePtr, - accessAlignment, offset, strides, sizes); + auto memRefDescriptor = + this->createMemRefDescriptor(loc, memRefType, allocatedPtr, alignedPtr, + offset, strides, sizes, rewriter); // Return the final value of the descriptor. rewriter.replaceOp(op, {memRefDescriptor}); } +}; -protected: - /// The minimum alignment to use with aligned_alloc (has to be a power of 2). - uint64_t kMinAlignedAllocAlignment = 16UL; +struct AllocOpLowering : public AllocLikeOpLowering { + AllocOpLowering(LLVMTypeConverter &converter) + : AllocLikeOpLowering(AllocOp::getOperationName(), converter) {} + + std::tuple allocateBuffer(ConversionPatternRewriter &rewriter, + Location loc, Value cumulativeSize, + Operation *op) const override { + // Heap allocations. + AllocOp allocOp = cast(op); + MemRefType memRefType = allocOp.getType(); + + Value alignment; + if (auto alignmentAttr = allocOp.alignment()) { + alignment = createIndexConstant(rewriter, loc, *alignmentAttr); + } else if (!memRefType.getElementType().isSignlessIntOrIndexOrFloat()) { + // In the case where no alignment is specified, we may want to override + // `malloc's` behavior. `malloc` typically aligns at the size of the + // biggest scalar on a target HW. For non-scalars, use the natural + // alignment of the LLVM type given by the LLVM DataLayout. + alignment = getSizeInBytes(loc, memRefType.getElementType(), rewriter); + } + + if (alignment) { + // Adjust the allocation size to consider alignment. + cumulativeSize = + rewriter.create(loc, cumulativeSize, alignment); + } + + // Allocate the underlying buffer and store a pointer to it in the MemRef + // descriptor. + Type elementPtrType = this->getElementPtrType(memRefType); + Value allocatedPtr = + createAllocCall(loc, "malloc", elementPtrType, {cumulativeSize}, + allocOp.getParentOfType(), rewriter); + + Value alignedPtr = allocatedPtr; + if (alignment) { + auto intPtrType = getIntPtrType(memRefType.getMemorySpace()); + // Compute the aligned type pointer. + Value allocatedInt = + rewriter.create(loc, intPtrType, allocatedPtr); + Value alignmentInt = + createAligned(rewriter, loc, allocatedInt, alignment); + alignedPtr = + rewriter.create(loc, elementPtrType, alignmentInt); + } + + return std::make_tuple(allocatedPtr, alignedPtr); + } }; -struct AllocOpLowering : public AllocLikeOpLowering { - explicit AllocOpLowering(LLVMTypeConverter &converter) - : AllocLikeOpLowering(converter) {} +struct AlignedAllocOpLowering : public AllocLikeOpLowering { + AlignedAllocOpLowering(LLVMTypeConverter &converter) + : AllocLikeOpLowering(AllocOp::getOperationName(), converter) {} + + /// Returns the memref's element size in bytes. + // TODO: there are other places where this is used. Expose publicly? + static unsigned getMemRefEltSizeInBytes(MemRefType memRefType) { + auto elementType = memRefType.getElementType(); + + unsigned sizeInBits; + if (elementType.isIntOrFloat()) { + sizeInBits = elementType.getIntOrFloatBitWidth(); + } else { + auto vectorType = elementType.cast(); + sizeInBits = + vectorType.getElementTypeBitWidth() * vectorType.getNumElements(); + } + return llvm::divideCeil(sizeInBits, 8); + } + + /// Returns true if the memref size in bytes is known to be a multiple of + /// factor. + static bool isMemRefSizeMultipleOf(MemRefType type, uint64_t factor) { + uint64_t sizeDivisor = getMemRefEltSizeInBytes(type); + for (unsigned i = 0, e = type.getRank(); i < e; i++) { + if (type.isDynamic(type.getDimSize(i))) + continue; + sizeDivisor = sizeDivisor * type.getDimSize(i); + } + return sizeDivisor % factor == 0; + } + + /// Returns the alignment to be used for the allocation call itself. + /// aligned_alloc requires the allocation size to be a power of two, and the + /// allocation size to be a multiple of alignment, + int64_t getAllocationAlignment(AllocOp allocOp) const { + if (Optional alignment = allocOp.alignment()) + return *alignment; + + // Whenever we don't have alignment set, we will use an alignment + // consistent with the element type; since the allocation size has to be a + // power of two, we will bump to the next power of two if it already isn't. + auto eltSizeBytes = getMemRefEltSizeInBytes(allocOp.getType()); + return std::max(kMinAlignedAllocAlignment, + llvm::PowerOf2Ceil(eltSizeBytes)); + } + + std::tuple allocateBuffer(ConversionPatternRewriter &rewriter, + Location loc, Value cumulativeSize, + Operation *op) const override { + // Heap allocations. + AllocOp allocOp = cast(op); + MemRefType memRefType = allocOp.getType(); + int64_t alignment = getAllocationAlignment(allocOp); + Value allocAlignment = createIndexConstant(rewriter, loc, alignment); + + // aligned_alloc requires size to be a multiple of alignment; we will pad + // the size to the next multiple if necessary. + if (!isMemRefSizeMultipleOf(memRefType, alignment)) + cumulativeSize = + createAligned(rewriter, loc, cumulativeSize, allocAlignment); + + Type elementPtrType = this->getElementPtrType(memRefType); + Value allocatedPtr = createAllocCall( + loc, "aligned_alloc", elementPtrType, {allocAlignment, cumulativeSize}, + allocOp.getParentOfType(), rewriter); + + return std::make_tuple(allocatedPtr, allocatedPtr); + } + + /// The minimum alignment to use with aligned_alloc (has to be a power of 2). + static constexpr uint64_t kMinAlignedAllocAlignment = 16UL; }; -using AllocaOpLowering = AllocLikeOpLowering; +struct AllocaOpLowering : public AllocLikeOpLowering { + AllocaOpLowering(LLVMTypeConverter &converter) + : AllocLikeOpLowering(AllocaOp::getOperationName(), converter) {} + + /// Allocates the underlying buffer using the right call. `allocatedBytePtr` + /// is set to null for stack allocations. `accessAlignment` is set if + /// alignment is needed post allocation (for eg. in conjunction with malloc). + std::tuple allocateBuffer(ConversionPatternRewriter &rewriter, + Location loc, Value cumulativeSize, + Operation *op) const override { + + // With alloca, one gets a pointer to the element type right away. + // For stack allocations. + auto allocaOp = cast(op); + auto elementPtrType = this->getElementPtrType(allocaOp.getType()); + + auto allocatedElementPtr = rewriter.create( + loc, elementPtrType, cumulativeSize, + allocaOp.alignment() ? *allocaOp.alignment() : 0); + + return std::make_tuple(allocatedElementPtr, allocatedElementPtr); + } +}; /// Copies the shaped descriptor part to (if `toDynamic` is set) or from /// (otherwise) the dynamically allocated memory for any operands that were @@ -3200,12 +3224,13 @@ struct AssumeAlignmentOpLowering // This relies on LLVM's CSE optimization (potentially after SROA), since // after CSE all memref.alignedPtr instances get de-duplicated into the same // pointer SSA value. - Value zero = - createIndexAttrConstant(rewriter, op->getLoc(), getIndexType(), 0); - Value mask = createIndexAttrConstant(rewriter, op->getLoc(), getIndexType(), + auto intPtrType = + getIntPtrType(memRefDescriptor.getElementPtrType().getAddressSpace()); + Value zero = createIndexAttrConstant(rewriter, op->getLoc(), intPtrType, 0); + Value mask = createIndexAttrConstant(rewriter, op->getLoc(), intPtrType, alignment - 1); Value ptrValue = - rewriter.create(op->getLoc(), getIndexType(), ptr); + rewriter.create(op->getLoc(), intPtrType, ptr); rewriter.create( op->getLoc(), rewriter.create( @@ -3477,9 +3502,12 @@ void mlir::populateStdToLLVMMemoryConversionPatterns( StoreOpLowering, SubViewOpLowering, TransposeOpLowering, - ViewOpLowering, - AllocOpLowering>(converter); + ViewOpLowering>(converter); // clang-format on + if (converter.getOptions().useAlignedAlloc) + patterns.insert(converter); + else + patterns.insert(converter); } void mlir::populateStdToLLVMFuncOpConversionPattern( diff --git a/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir b/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir index 4e3edd4c7c15d..8e7b22574432d 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir @@ -36,7 +36,6 @@ func @mixed_alloc(%arg0: index, %arg1: index) -> memref { // CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr // CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // CHECK-NEXT: %[[sz_bytes:.*]] = llvm.mul %[[sz]], %[[sizeof]] : !llvm.i64 -// CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: llvm.call @malloc(%[[sz_bytes]]) : (!llvm.i64) -> !llvm.ptr // CHECK-NEXT: llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr // CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> @@ -77,7 +76,6 @@ func @dynamic_alloc(%arg0: index, %arg1: index) -> memref { // CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr // CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // CHECK-NEXT: %[[sz_bytes:.*]] = llvm.mul %[[sz]], %[[sizeof]] : !llvm.i64 -// CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: llvm.call @malloc(%[[sz_bytes]]) : (!llvm.i64) -> !llvm.ptr // CHECK-NEXT: llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr // CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> @@ -107,7 +105,6 @@ func @dynamic_alloca(%arg0: index, %arg1: index) -> memref { // CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr // CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // CHECK-NEXT: %[[sz_bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64 -// CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[allocated:.*]] = llvm.alloca %[[sz_bytes]] x !llvm.float : (!llvm.i64) -> !llvm.ptr // CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: llvm.insertvalue %[[allocated]], %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> @@ -153,8 +150,7 @@ func @stdlib_aligned_alloc(%N : index) -> memref<32x18xf32> { // ALIGNED-ALLOC-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr // ALIGNED-ALLOC-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // ALIGNED-ALLOC-NEXT: %[[bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64 -// ALIGNED-ALLOC-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// ALIGNED-ALLOC-NEXT: %[[alignment:.*]] = llvm.mlir.constant(32 : i64) : !llvm.i64 +// ALIGNED-ALLOC-NEXT: %[[alignment:.*]] = llvm.mlir.constant(32 : index) : !llvm.i64 // ALIGNED-ALLOC-NEXT: %[[allocated:.*]] = llvm.call @aligned_alloc(%[[alignment]], %[[bytes]]) : (!llvm.i64, !llvm.i64) -> !llvm.ptr // ALIGNED-ALLOC-NEXT: llvm.bitcast %[[allocated]] : !llvm.ptr to !llvm.ptr %0 = alloc() {alignment = 32} : memref<32x18xf32> @@ -164,26 +160,27 @@ func @stdlib_aligned_alloc(%N : index) -> memref<32x18xf32> { %1 = alloc() {alignment = 64} : memref<4096xf32> // Alignment is to element type boundaries (minimum 16 bytes). - // ALIGNED-ALLOC: %[[c32:.*]] = llvm.mlir.constant(32 : i64) : !llvm.i64 + // ALIGNED-ALLOC: %[[c32:.*]] = llvm.mlir.constant(32 : index) : !llvm.i64 // ALIGNED-ALLOC-NEXT: llvm.call @aligned_alloc(%[[c32]] %2 = alloc() : memref<4096xvector<8xf32>> // The minimum alignment is 16 bytes unless explicitly specified. - // ALIGNED-ALLOC: %[[c16:.*]] = llvm.mlir.constant(16 : i64) : !llvm.i64 + // ALIGNED-ALLOC: %[[c16:.*]] = llvm.mlir.constant(16 : index) : !llvm.i64 // ALIGNED-ALLOC-NEXT: llvm.call @aligned_alloc(%[[c16]], %3 = alloc() : memref<4096xvector<2xf32>> - // ALIGNED-ALLOC: %[[c8:.*]] = llvm.mlir.constant(8 : i64) : !llvm.i64 + // ALIGNED-ALLOC: %[[c8:.*]] = llvm.mlir.constant(8 : index) : !llvm.i64 // ALIGNED-ALLOC-NEXT: llvm.call @aligned_alloc(%[[c8]], %4 = alloc() {alignment = 8} : memref<1024xvector<4xf32>> // Bump the memref allocation size if its size is not a multiple of alignment. - // ALIGNED-ALLOC: %[[c32:.*]] = llvm.mlir.constant(32 : i64) : !llvm.i64 - // ALIGNED-ALLOC-NEXT: llvm.urem + // ALIGNED-ALLOC: %[[c32:.*]] = llvm.mlir.constant(32 : index) : !llvm.i64 + // ALIGNED-ALLOC-NEXT: llvm.mlir.constant(1 : index) : !llvm.i64 // ALIGNED-ALLOC-NEXT: llvm.sub + // ALIGNED-ALLOC-NEXT: llvm.add // ALIGNED-ALLOC-NEXT: llvm.urem - // ALIGNED-ALLOC-NEXT: %[[SIZE_ALIGNED:.*]] = llvm.add + // ALIGNED-ALLOC-NEXT: %[[SIZE_ALIGNED:.*]] = llvm.sub // ALIGNED-ALLOC-NEXT: llvm.call @aligned_alloc(%[[c32]], %[[SIZE_ALIGNED]]) %5 = alloc() {alignment = 32} : memref<100xf32> // Bump alignment to the next power of two if it isn't. - // ALIGNED-ALLOC: %[[c128:.*]] = llvm.mlir.constant(128 : i64) : !llvm.i64 + // ALIGNED-ALLOC: %[[c128:.*]] = llvm.mlir.constant(128 : index) : !llvm.i64 // ALIGNED-ALLOC: llvm.call @aligned_alloc(%[[c128]] %6 = alloc(%N) : memref> return %0 : memref<32x18xf32> diff --git a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir index b93446f00d2ee..d9d93b7823b82 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir @@ -76,7 +76,6 @@ func @zero_d_alloc() -> memref { // CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr // CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // CHECK-NEXT: llvm.mul %{{.*}}, %[[sizeof]] : !llvm.i64 -// CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: llvm.call @malloc(%{{.*}}) : (!llvm.i64) -> !llvm.ptr // CHECK-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr // CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64)> @@ -91,7 +90,6 @@ func @zero_d_alloc() -> memref { // BAREPTR-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr // BAREPTR-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // BAREPTR-NEXT: llvm.mul %{{.*}}, %[[sizeof]] : !llvm.i64 -// BAREPTR-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // BAREPTR-NEXT: llvm.call @malloc(%{{.*}}) : (!llvm.i64) -> !llvm.ptr // BAREPTR-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr // BAREPTR-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64)> @@ -130,19 +128,19 @@ func @aligned_1d_alloc() -> memref<42xf32> { // CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr // CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // CHECK-NEXT: llvm.mul %{{.*}}, %[[sizeof]] : !llvm.i64 -// CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[alignment:.*]] = llvm.mlir.constant(8 : index) : !llvm.i64 // CHECK-NEXT: %[[allocsize:.*]] = llvm.add {{.*}}, %[[alignment]] : !llvm.i64 // CHECK-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (!llvm.i64) -> !llvm.ptr // CHECK-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr +// CHECK-NEXT: %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[ptr]] : !llvm.ptr to !llvm.i64 +// CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 +// CHECK-NEXT: %[[bump:.*]] = llvm.sub %[[alignment]], %[[one_1]] : !llvm.i64 +// CHECK-NEXT: %[[bumped:.*]] = llvm.add %[[allocatedAsInt]], %[[bump]] : !llvm.i64 +// CHECK-NEXT: %[[mod:.*]] = llvm.urem %[[bumped]], %[[alignment]] : !llvm.i64 +// CHECK-NEXT: %[[aligned:.*]] = llvm.sub %[[bumped]], %[[mod]] : !llvm.i64 +// CHECK-NEXT: %[[alignedBitCast:.*]] = llvm.inttoptr %[[aligned]] : !llvm.i64 to !llvm.ptr // CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // CHECK-NEXT: llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-NEXT: %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[allocated]] : !llvm.ptr to !llvm.i64 -// CHECK-NEXT: %[[alignAdj1:.*]] = llvm.urem %[[allocatedAsInt]], %[[alignment]] : !llvm.i64 -// CHECK-NEXT: %[[alignAdj2:.*]] = llvm.sub %[[alignment]], %[[alignAdj1]] : !llvm.i64 -// CHECK-NEXT: %[[alignAdj3:.*]] = llvm.urem %[[alignAdj2]], %[[alignment]] : !llvm.i64 -// CHECK-NEXT: %[[aligned:.*]] = llvm.getelementptr %[[allocated]][%[[alignAdj3]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr -// CHECK-NEXT: %[[alignedBitCast:.*]] = llvm.bitcast %[[aligned]] : !llvm.ptr to !llvm.ptr // CHECK-NEXT: llvm.insertvalue %[[alignedBitCast]], %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // CHECK-NEXT: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 // CHECK-NEXT: llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> @@ -153,19 +151,19 @@ func @aligned_1d_alloc() -> memref<42xf32> { // BAREPTR-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr // BAREPTR-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // BAREPTR-NEXT: llvm.mul %{{.*}}, %[[sizeof]] : !llvm.i64 -// BAREPTR-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // BAREPTR-NEXT: %[[alignment:.*]] = llvm.mlir.constant(8 : index) : !llvm.i64 // BAREPTR-NEXT: %[[allocsize:.*]] = llvm.add {{.*}}, %[[alignment]] : !llvm.i64 // BAREPTR-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (!llvm.i64) -> !llvm.ptr // BAREPTR-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr +// BAREPTR-NEXT: %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[ptr]] : !llvm.ptr to !llvm.i64 +// BAREPTR-NEXT: %[[one_2:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 +// BAREPTR-NEXT: %[[bump:.*]] = llvm.sub %[[alignment]], %[[one_2]] : !llvm.i64 +// BAREPTR-NEXT: %[[bumped:.*]] = llvm.add %[[allocatedAsInt]], %[[bump]] : !llvm.i64 +// BAREPTR-NEXT: %[[mod:.*]] = llvm.urem %[[bumped]], %[[alignment]] : !llvm.i64 +// BAREPTR-NEXT: %[[aligned:.*]] = llvm.sub %[[bumped]], %[[mod]] : !llvm.i64 +// BAREPTR-NEXT: %[[alignedBitCast:.*]] = llvm.inttoptr %[[aligned]] : !llvm.i64 to !llvm.ptr // BAREPTR-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // BAREPTR-NEXT: llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// BAREPTR-NEXT: %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[allocated]] : !llvm.ptr to !llvm.i64 -// BAREPTR-NEXT: %[[alignAdj1:.*]] = llvm.urem %[[allocatedAsInt]], %[[alignment]] : !llvm.i64 -// BAREPTR-NEXT: %[[alignAdj2:.*]] = llvm.sub %[[alignment]], %[[alignAdj1]] : !llvm.i64 -// BAREPTR-NEXT: %[[alignAdj3:.*]] = llvm.urem %[[alignAdj2]], %[[alignment]] : !llvm.i64 -// BAREPTR-NEXT: %[[aligned:.*]] = llvm.getelementptr %[[allocated]][%[[alignAdj3]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr -// BAREPTR-NEXT: %[[alignedBitCast:.*]] = llvm.bitcast %[[aligned]] : !llvm.ptr to !llvm.ptr // BAREPTR-NEXT: llvm.insertvalue %[[alignedBitCast]], %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // BAREPTR-NEXT: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 // BAREPTR-NEXT: llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> @@ -186,7 +184,6 @@ func @static_alloc() -> memref<32x18xf32> { // CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr // CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // CHECK-NEXT: %[[bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64 -// CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[bytes]]) : (!llvm.i64) -> !llvm.ptr // CHECK-NEXT: llvm.bitcast %[[allocated]] : !llvm.ptr to !llvm.ptr @@ -198,7 +195,6 @@ func @static_alloc() -> memref<32x18xf32> { // BAREPTR-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr // BAREPTR-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // BAREPTR-NEXT: %[[bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64 -// BAREPTR-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // BAREPTR-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[bytes]]) : (!llvm.i64) -> !llvm.ptr // BAREPTR-NEXT: llvm.bitcast %[[allocated]] : !llvm.ptr to !llvm.ptr %0 = alloc() : memref<32x18xf32> @@ -217,7 +213,6 @@ func @static_alloca() -> memref<32x18xf32> { // CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr // CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // CHECK-NEXT: %[[bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64 -// CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[allocated:.*]] = llvm.alloca %[[bytes]] x !llvm.float : (!llvm.i64) -> !llvm.ptr %0 = alloca() : memref<32x18xf32> From d5a6da84a3462d41be003f4e5d4d1d2e0364ce9d Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 5 Oct 2020 11:42:13 -0400 Subject: [PATCH 034/321] [libc++/abi] Revert "[libc++] Move the weak symbols list to libc++abi" This reverts commit c7d4aa711a. I am still investigating the issue, but it looks like that commit has an interaction with ld64 that causes new/delete weak re-exports not to work properly anymore. This is weird because this commit did not touch the exports of new/delete -- I am still investigating. --- libcxx/lib/weak.exp | 16 ++++++++++++++++ libcxx/src/CMakeLists.txt | 3 ++- libcxxabi/lib/weak.exp | 7 ------- libcxxabi/src/CMakeLists.txt | 2 -- 4 files changed, 18 insertions(+), 10 deletions(-) create mode 100644 libcxx/lib/weak.exp delete mode 100644 libcxxabi/lib/weak.exp diff --git a/libcxx/lib/weak.exp b/libcxx/lib/weak.exp new file mode 100644 index 0000000000000..6bdcc0578460d --- /dev/null +++ b/libcxx/lib/weak.exp @@ -0,0 +1,16 @@ +__ZTISt10bad_typeid +__ZTISt11logic_error +__ZTISt11range_error +__ZTISt12domain_error +__ZTISt12length_error +__ZTISt12out_of_range +__ZTISt13bad_exception +__ZTISt13runtime_error +__ZTISt14overflow_error +__ZTISt15underflow_error +__ZTISt16invalid_argument +__ZTISt16nested_exception +__ZTISt20bad_array_new_length +__ZTISt8bad_cast +__ZTISt9bad_alloc +__ZTISt9exception diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index 98a374c2bf9f5..0e6819369ffa1 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -210,7 +210,8 @@ if (LIBCXX_ENABLE_SHARED) target_link_libraries(cxx_shared PRIVATE "-Wl,-unexported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/libc++unexp.exp" "-Wl,-reexported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/libc++abi.v${LIBCXX_LIBCPPABI_VERSION}.exp" - "-Wl,-force_symbols_not_weak_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/notweak.exp") + "-Wl,-force_symbols_not_weak_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/notweak.exp" + "-Wl,-force_symbols_weak_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/weak.exp") if (NOT LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS) target_link_libraries(cxx_shared PRIVATE diff --git a/libcxxabi/lib/weak.exp b/libcxxabi/lib/weak.exp deleted file mode 100644 index 784ca45decc19..0000000000000 --- a/libcxxabi/lib/weak.exp +++ /dev/null @@ -1,7 +0,0 @@ -__ZTISt11range_error -__ZTISt12domain_error -__ZTISt12length_error -__ZTISt12out_of_range -__ZTISt14overflow_error -__ZTISt15underflow_error -__ZTISt16invalid_argument diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index 58d04e6578e31..c57d6fa83aa0f 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -214,8 +214,6 @@ if (LIBCXXABI_ENABLE_SHARED) export_symbols("${CMAKE_CURRENT_SOURCE_DIR}/../lib/itanium-base.exp") - target_link_libraries(cxxabi_shared PRIVATE "-Wl,-force_symbols_weak_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/weak.exp") - if (LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS) export_symbols("${CMAKE_CURRENT_SOURCE_DIR}/../lib/new-delete.exp") endif() From 0ac210e5801ac8f80305613aac3f9a89ba0cc83d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 5 Oct 2020 15:51:50 +0100 Subject: [PATCH 035/321] [X86] isTargetShuffleEquivalent - merge duplicate array accesses. NFCI. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 47aad8965e1c8..8638db813360f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10955,7 +10955,7 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef Mask, int ExpectedIdx = ExpectedMask[i]; if (MaskIdx == SM_SentinelUndef || MaskIdx == ExpectedIdx) continue; - if (0 <= Mask[i] && 0 <= ExpectedMask[i]) { + if (0 <= MaskIdx && 0 <= ExpectedIdx) { SDValue MaskV = MaskIdx < Size ? V1 : V2; SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2; MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size); From 2efd9fd699ed59df2927074f318edac99533e402 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 5 Oct 2020 16:50:07 +0100 Subject: [PATCH 036/321] [InstCombine] Add or(shl(v,and(x,bw-1)),lshr(v,bw-and(x,bw-1))) funnel shift tests If we know the shift amount is less than the bitwidth we should be able to convert this to a funnel shift --- llvm/test/Transforms/InstCombine/funnel.ll | 55 ++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/funnel.ll b/llvm/test/Transforms/InstCombine/funnel.ll index 9adb91b88d7ff..f8844519ee749 100644 --- a/llvm/test/Transforms/InstCombine/funnel.ll +++ b/llvm/test/Transforms/InstCombine/funnel.ll @@ -182,3 +182,58 @@ define <3 x i36> @fshl_v3i36_constant_nonsplat_undef0(<3 x i36> %x, <3 x i36> %y %r = or <3 x i36> %shl, %shr ret <3 x i36> %r } + +; Fold or(shl(x,a),lshr(y,bw-a)) -> fshl(x,y,a) iff a < bw + +define i64 @fshl_sub_mask(i64 %x, i64 %y, i64 %a) { +; CHECK-LABEL: @fshl_sub_mask( +; CHECK-NEXT: [[MASK:%.*]] = and i64 [[A:%.*]], 63 +; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[X:%.*]], [[MASK]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i64 64, [[MASK]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[Y:%.*]], [[SUB]] +; CHECK-NEXT: [[R:%.*]] = or i64 [[SHL]], [[SHR]] +; CHECK-NEXT: ret i64 [[R]] +; + %mask = and i64 %a, 63 + %shl = shl i64 %x, %mask + %sub = sub nuw nsw i64 64, %mask + %shr = lshr i64 %y, %sub + %r = or i64 %shl, %shr + ret i64 %r +} + +; Fold or(lshr(v,a),shl(v,bw-a)) -> fshr(y,x,a) iff a < bw + +define i64 @fshr_sub_mask(i64 %x, i64 %y, i64 %a) { +; CHECK-LABEL: @fshr_sub_mask( +; CHECK-NEXT: [[MASK:%.*]] = and i64 [[A:%.*]], 63 +; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[X:%.*]], [[MASK]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i64 64, [[MASK]] +; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[Y:%.*]], [[SUB]] +; CHECK-NEXT: [[R:%.*]] = or i64 [[SHL]], [[SHR]] +; CHECK-NEXT: ret i64 [[R]] +; + %mask = and i64 %a, 63 + %shr = lshr i64 %x, %mask + %sub = sub nuw nsw i64 64, %mask + %shl = shl i64 %y, %sub + %r = or i64 %shl, %shr + ret i64 %r +} + +define <2 x i64> @fshr_sub_mask_vector(<2 x i64> %x, <2 x i64> %y, <2 x i64> %a) { +; CHECK-LABEL: @fshr_sub_mask_vector( +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i64> [[X:%.*]], [[MASK]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw <2 x i64> , [[MASK]] +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i64> [[Y:%.*]], [[SUB]] +; CHECK-NEXT: [[R:%.*]] = or <2 x i64> [[SHL]], [[SHR]] +; CHECK-NEXT: ret <2 x i64> [[R]] +; + %mask = and <2 x i64> %a, + %shr = lshr <2 x i64> %x, %mask + %sub = sub nuw nsw <2 x i64> , %mask + %shl = shl <2 x i64> %y, %sub + %r = or <2 x i64> %shl, %shr + ret <2 x i64> %r +} From 5ba084c42fdc80a51fc49c27f5f3ecf153953114 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 5 Oct 2020 17:21:54 +0100 Subject: [PATCH 037/321] [InstCombine] Extend 'shift with constants' vector tests Added missing test coverage for shl(add(and(lshr(x,c1),c2),y),c1) -> add(and(x,c2< @test_FoldShiftByConstant_CreateAnd(<16 x i8> %in0) { ret <16 x i8> %vshl_n } -define i32 @bar(i32 %x, i32 %y) { -; CHECK-LABEL: @bar( +define i32 @lshr_add_shl(i32 %x, i32 %y) { +; CHECK-LABEL: @lshr_add_shl( ; CHECK-NEXT: [[B1:%.*]] = shl i32 [[Y:%.*]], 4 ; CHECK-NEXT: [[A2:%.*]] = add i32 [[B1]], [[X:%.*]] ; CHECK-NEXT: [[C:%.*]] = and i32 [[A2]], -16 @@ -46,8 +46,8 @@ define i32 @bar(i32 %x, i32 %y) { ret i32 %c } -define <2 x i32> @bar_v2i32(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @bar_v2i32( +define <2 x i32> @lshr_add_shl_v2i32(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @lshr_add_shl_v2i32( ; CHECK-NEXT: [[B1:%.*]] = shl <2 x i32> [[Y:%.*]], ; CHECK-NEXT: [[A2:%.*]] = add <2 x i32> [[B1]], [[X:%.*]] ; CHECK-NEXT: [[C:%.*]] = and <2 x i32> [[A2]], @@ -59,8 +59,93 @@ define <2 x i32> @bar_v2i32(<2 x i32> %x, <2 x i32> %y) { ret <2 x i32> %c } -define i32 @foo(i32 %x, i32 %y) { -; CHECK-LABEL: @foo( +define <2 x i32> @lshr_add_shl_v2i32_undef(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @lshr_add_shl_v2i32_undef( +; CHECK-NEXT: [[A:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = add <2 x i32> [[A]], [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = shl <2 x i32> [[B]], +; CHECK-NEXT: ret <2 x i32> [[C]] +; + %a = lshr <2 x i32> %x, + %b = add <2 x i32> %a, %y + %c = shl <2 x i32> %b, + ret <2 x i32> %c +} + +define <2 x i32> @lshr_add_shl_v2i32_nonuniform(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @lshr_add_shl_v2i32_nonuniform( +; CHECK-NEXT: [[A:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = add <2 x i32> [[A]], [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = shl <2 x i32> [[B]], +; CHECK-NEXT: ret <2 x i32> [[C]] +; + %a = lshr <2 x i32> %x, + %b = add <2 x i32> %a, %y + %c = shl <2 x i32> %b, + ret <2 x i32> %c +} + +define i32 @lshr_add_and_shl(i32 %x, i32 %y) { +; CHECK-LABEL: @lshr_add_and_shl( +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[Y:%.*]], 5 +; CHECK-NEXT: [[X_MASK:%.*]] = and i32 [[X:%.*]], 4064 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[X_MASK]], [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP2]] +; + %1 = lshr i32 %x, 5 + %2 = and i32 %1, 127 + %3 = add i32 %y, %2 + %4 = shl i32 %3, 5 + ret i32 %4 +} + +define <2 x i32> @lshr_add_and_shl_v2i32(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @lshr_add_and_shl_v2i32( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i32> [[TMP3]], +; CHECK-NEXT: ret <2 x i32> [[TMP4]] +; + %1 = lshr <2 x i32> %x, + %2 = and <2 x i32> %1, + %3 = add <2 x i32> %y, %2 + %4 = shl <2 x i32> %3, + ret <2 x i32> %4 +} + +define <2 x i32> @lshr_add_and_shl_v2i32_undef(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @lshr_add_and_shl_v2i32_undef( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i32> [[TMP3]], +; CHECK-NEXT: ret <2 x i32> [[TMP4]] +; + %1 = lshr <2 x i32> %x, + %2 = and <2 x i32> %1, + %3 = add <2 x i32> %y, %2 + %4 = shl <2 x i32> %3, + ret <2 x i32> %4 +} + +define <2 x i32> @lshr_add_and_shl_v2i32_nonuniform(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @lshr_add_and_shl_v2i32_nonuniform( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i32> [[TMP3]], +; CHECK-NEXT: ret <2 x i32> [[TMP4]] +; + %1 = lshr <2 x i32> %x, + %2 = and <2 x i32> %1, + %3 = add <2 x i32> %y, %2 + %4 = shl <2 x i32> %3, + ret <2 x i32> %4 +} + +define i32 @shl_add_and_lshr(i32 %x, i32 %y) { +; CHECK-LABEL: @shl_add_and_lshr( ; CHECK-NEXT: [[C1:%.*]] = shl i32 [[Y:%.*]], 4 ; CHECK-NEXT: [[X_MASK:%.*]] = and i32 [[X:%.*]], 128 ; CHECK-NEXT: [[D:%.*]] = add i32 [[X_MASK]], [[C1]] @@ -73,8 +158,8 @@ define i32 @foo(i32 %x, i32 %y) { ret i32 %d } -define <2 x i32> @foo_v2i32(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @foo_v2i32( +define <2 x i32> @shl_add_and_lshr_v2i32(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @shl_add_and_lshr_v2i32( ; CHECK-NEXT: [[A:%.*]] = lshr <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A]], ; CHECK-NEXT: [[C:%.*]] = add <2 x i32> [[B]], [[Y:%.*]] @@ -88,3 +173,32 @@ define <2 x i32> @foo_v2i32(<2 x i32> %x, <2 x i32> %y) { ret <2 x i32> %d } +define <2 x i32> @shl_add_and_lshr_v2i32_undef(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @shl_add_and_lshr_v2i32_undef( +; CHECK-NEXT: [[A:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A]], +; CHECK-NEXT: [[C:%.*]] = add <2 x i32> [[B]], [[Y:%.*]] +; CHECK-NEXT: [[D:%.*]] = shl <2 x i32> [[C]], +; CHECK-NEXT: ret <2 x i32> [[D]] +; + %a = lshr <2 x i32> %x, + %b = and <2 x i32> %a, + %c = add <2 x i32> %b, %y + %d = shl <2 x i32> %c, + ret <2 x i32> %d +} + +define <2 x i32> @shl_add_and_lshr_v2i32_nonuniform(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @shl_add_and_lshr_v2i32_nonuniform( +; CHECK-NEXT: [[A:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A]], +; CHECK-NEXT: [[C:%.*]] = add <2 x i32> [[B]], [[Y:%.*]] +; CHECK-NEXT: [[D:%.*]] = shl <2 x i32> [[C]], +; CHECK-NEXT: ret <2 x i32> [[D]] +; + %a = lshr <2 x i32> %x, + %b = and <2 x i32> %a, + %c = add <2 x i32> %b, %y + %d = shl <2 x i32> %c, + ret <2 x i32> %d +} From 9642ded8ba64590aca2720e4c124368d8e98cc8d Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Mon, 5 Oct 2020 11:27:15 -0500 Subject: [PATCH 038/321] [SVE] Lower fixed length VECREDUCE_AND operation Differential Revision: https://reviews.llvm.org/D88707 --- .../Target/AArch64/AArch64ISelLowering.cpp | 16 +- .../AArch64/sve-fixed-length-log-reduce.ll | 374 ++++++++++++++++++ 2 files changed, 388 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f513dce73277b..d799e1d2e64c1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1114,6 +1114,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::UMAX, MVT::v2i64, Custom); setOperationAction(ISD::UMIN, MVT::v1i64, Custom); setOperationAction(ISD::UMIN, MVT::v2i64, Custom); + setOperationAction(ISD::VECREDUCE_AND, MVT::v8i8, Custom); + setOperationAction(ISD::VECREDUCE_AND, MVT::v16i8, Custom); + setOperationAction(ISD::VECREDUCE_AND, MVT::v4i16, Custom); + setOperationAction(ISD::VECREDUCE_AND, MVT::v8i16, Custom); + setOperationAction(ISD::VECREDUCE_AND, MVT::v2i32, Custom); + setOperationAction(ISD::VECREDUCE_AND, MVT::v4i32, Custom); + setOperationAction(ISD::VECREDUCE_AND, MVT::v2i64, Custom); setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom); setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom); setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom); @@ -1245,6 +1252,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { setOperationAction(ISD::UMAX, VT, Custom); setOperationAction(ISD::UMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); + setOperationAction(ISD::VECREDUCE_AND, VT, Custom); setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); @@ -3927,6 +3935,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_AND: case ISD::VECREDUCE_SMAX: case ISD::VECREDUCE_SMIN: case ISD::VECREDUCE_UMAX: @@ -9714,12 +9723,15 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, // Try to lower fixed length reductions to SVE. EVT SrcVT = Src.getValueType(); - bool OverrideNEON = Op.getOpcode() != ISD::VECREDUCE_ADD && - SrcVT.getVectorElementType() == MVT::i64; + bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND || + (Op.getOpcode() != ISD::VECREDUCE_ADD && + SrcVT.getVectorElementType() == MVT::i64); if (useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) { switch (Op.getOpcode()) { case ISD::VECREDUCE_ADD: return LowerFixedLengthReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG); + case ISD::VECREDUCE_AND: + return LowerFixedLengthReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG); case ISD::VECREDUCE_SMAX: return LowerFixedLengthReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG); case ISD::VECREDUCE_SMIN: diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll new file mode 100644 index 0000000000000..b95564cbc1e5e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll @@ -0,0 +1,374 @@ +; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=16 -check-prefix=NO_SVE +; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_EQ_256 +; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK +; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048 + +target triple = "aarch64-unknown-linux-gnu" + +; Don't use SVE when its registers are no bigger than NEON. +; NO_SVE-NOT: ptrue + +; +; ANDV +; + +; No single instruction NEON ANDV support. Use SVE. +define i8 @andv_v8i8(<8 x i8> %a) #0 { +; CHECK-LABEL: andv_v8i8: +; CHECK: ptrue [[PG:p[0-9]+]].b, vl8 +; CHECK: andv b[[REDUCE:[0-9]+]], [[PG]], z0.b +; CHECK: fmov w0, s[[REDUCE]] +; CHECK: ret + %res = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> %a) + ret i8 %res +} + +; No single instruction NEON ANDV support. Use SVE. +define i8 @andv_v16i8(<16 x i8> %a) #0 { +; CHECK-LABEL: andv_v16i8: +; CHECK: ptrue [[PG:p[0-9]+]].b, vl16 +; CHECK: andv b[[REDUCE:[0-9]+]], [[PG]], z0.b +; CHECK: fmov w0, s[[REDUCE]] +; CHECK: ret + %res = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> %a) + ret i8 %res +} + +define i8 @andv_v32i8(<32 x i8>* %a) #0 { +; CHECK-LABEL: andv_v32i8: +; VBITS_GE_256: ptrue [[PG:p[0-9]+]].b, vl32 +; VBITS_GE_256-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_256-NEXT: andv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b +; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_256-NEXT: ret + %op = load <32 x i8>, <32 x i8>* %a + %res = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> %op) + ret i8 %res +} + +define i8 @andv_v64i8(<64 x i8>* %a) #0 { +; CHECK-LABEL: andv_v64i8: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64 +; VBITS_GE_512-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: andv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b +; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 +; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]] +; VBITS_EQ_256-DAG: and [[AND:z[0-9]+]].d, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: andv b[[REDUCE:[0-9]+]], [[PG]], [[AND]].b +; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] +; VBITS_EQ_256-NEXT: ret + + %op = load <64 x i8>, <64 x i8>* %a + %res = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> %op) + ret i8 %res +} + +define i8 @andv_v128i8(<128 x i8>* %a) #0 { +; CHECK-LABEL: andv_v128i8: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128 +; VBITS_GE_1024-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: andv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b +; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_1024-NEXT: ret + %op = load <128 x i8>, <128 x i8>* %a + %res = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> %op) + ret i8 %res +} + +define i8 @andv_v256i8(<256 x i8>* %a) #0 { +; CHECK-LABEL: andv_v256i8: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256 +; VBITS_GE_2048-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: andv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b +; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_2048-NEXT: ret + %op = load <256 x i8>, <256 x i8>* %a + %res = call i8 @llvm.experimental.vector.reduce.and.v256i8(<256 x i8> %op) + ret i8 %res +} + +; No single instruction NEON ANDV support. Use SVE. +define i16 @andv_v4i16(<4 x i16> %a) #0 { +; CHECK-LABEL: andv_v4i16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl4 +; CHECK: andv h[[REDUCE:[0-9]+]], [[PG]], z0.h +; CHECK: fmov w0, s[[REDUCE]] +; CHECK: ret + %res = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> %a) + ret i16 %res +} + +; No single instruction NEON ANDV support. Use SVE. +define i16 @andv_v8i16(<8 x i16> %a) #0 { +; CHECK-LABEL: andv_v8i16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl8 +; CHECK: andv h[[REDUCE:[0-9]+]], [[PG]], z0.h +; CHECK: fmov w0, s[[REDUCE]] +; CHECK: ret + %res = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> %a) + ret i16 %res +} + +define i16 @andv_v16i16(<16 x i16>* %a) #0 { +; CHECK-LABEL: andv_v16i16: +; VBITS_GE_256: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_GE_256-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_256-NEXT: andv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h +; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_256-NEXT: ret + %op = load <16 x i16>, <16 x i16>* %a + %res = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> %op) + ret i16 %res +} + +define i16 @andv_v32i16(<32 x i16>* %a) #0 { +; CHECK-LABEL: andv_v32i16: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: andv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h +; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: and [[AND:z[0-9]+]].d, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: andv h[[REDUCE:[0-9]+]], [[PG]], [[AND]].h +; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] +; VBITS_EQ_256-NEXT: ret + %op = load <32 x i16>, <32 x i16>* %a + %res = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> %op) + ret i16 %res +} + +define i16 @andv_v64i16(<64 x i16>* %a) #0 { +; CHECK-LABEL: andv_v64i16: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: andv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h +; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_1024-NEXT: ret + %op = load <64 x i16>, <64 x i16>* %a + %res = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> %op) + ret i16 %res +} + +define i16 @andv_v128i16(<128 x i16>* %a) #0 { +; CHECK-LABEL: andv_v128i16: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: andv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h +; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_2048-NEXT: ret + %op = load <128 x i16>, <128 x i16>* %a + %res = call i16 @llvm.experimental.vector.reduce.and.v128i16(<128 x i16> %op) + ret i16 %res +} + +; No single instruction NEON ANDV support. Use SVE. +define i32 @andv_v2i32(<2 x i32> %a) #0 { +; CHECK-LABEL: andv_v2i32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl2 +; CHECK: andv [[REDUCE:s[0-9]+]], [[PG]], z0.s +; CHECK: fmov w0, [[REDUCE]] +; CHECK: ret + %res = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> %a) + ret i32 %res +} + +; No single instruction NEON ANDV support. Use SVE. +define i32 @andv_v4i32(<4 x i32> %a) #0 { +; CHECK-LABEL: andv_v4i32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl4 +; CHECK: andv [[REDUCE:s[0-9]+]], [[PG]], z0.s +; CHECK: fmov w0, [[REDUCE]] +; CHECK: ret + %res = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> %a) + ret i32 %res +} + +define i32 @andv_v8i32(<8 x i32>* %a) #0 { +; CHECK-LABEL: andv_v8i32: +; VBITS_GE_256: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_GE_256-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_256-NEXT: andv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]] +; VBITS_GE_256-NEXT: ret + %op = load <8 x i32>, <8 x i32>* %a + %res = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> %op) + ret i32 %res +} + +define i32 @andv_v16i32(<16 x i32>* %a) #0 { +; CHECK-LABEL: andv_v16i32: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: andv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_512-NEXT: fmov w0, [[REDUCE]] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: and [[AND:z[0-9]+]].d, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: andv [[REDUCE:s[0-9]+]], [[PG]], [[AND]].s +; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] +; VBITS_EQ_256-NEXT: ret + %op = load <16 x i32>, <16 x i32>* %a + %res = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> %op) + ret i32 %res +} + +define i32 @andv_v32i32(<32 x i32>* %a) #0 { +; CHECK-LABEL: andv_v32i32: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: andv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]] +; VBITS_GE_1024-NEXT: ret + %op = load <32 x i32>, <32 x i32>* %a + %res = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> %op) + ret i32 %res +} + +define i32 @andv_v64i32(<64 x i32>* %a) #0 { +; CHECK-LABEL: andv_v64i32: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: andv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]] +; VBITS_GE_2048-NEXT: ret + %op = load <64 x i32>, <64 x i32>* %a + %res = call i32 @llvm.experimental.vector.reduce.and.v64i32(<64 x i32> %op) + ret i32 %res +} + +; Nothing to do for single element vectors. +define i64 @andv_v1i64(<1 x i64> %a) #0 { +; CHECK-LABEL: andv_v1i64: +; CHECK: fmov x0, d0 +; CHECK: ret + %res = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> %a) + ret i64 %res +} + +; Use SVE for 128-bit vectors +define i64 @andv_v2i64(<2 x i64> %a) #0 { +; CHECK-LABEL: andv_v2i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl2 +; CHECK: andv [[REDUCE:d[0-9]+]], [[PG]], z0.d +; CHECK: fmov x0, [[REDUCE]] +; CHECK: ret + %res = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %a) + ret i64 %res +} + +define i64 @andv_v4i64(<4 x i64>* %a) #0 { +; CHECK-LABEL: andv_v4i64: +; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_GE_256-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_256-NEXT: andv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_256-NEXT: ret + %op = load <4 x i64>, <4 x i64>* %a + %res = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> %op) + ret i64 %res +} + +define i64 @andv_v8i64(<8 x i64>* %a) #0 { +; CHECK-LABEL: andv_v8i64: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: andv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: and [[AND:z[0-9]+]].d, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: andv [[REDUCE:d[0-9]+]], [[PG]], [[AND]].d +; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] +; VBITS_EQ_256-NEXT: ret + %op = load <8 x i64>, <8 x i64>* %a + %res = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> %op) + ret i64 %res +} + +define i64 @andv_v16i64(<16 x i64>* %a) #0 { +; CHECK-LABEL: andv_v16i64: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: andv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_1024-NEXT: ret + %op = load <16 x i64>, <16 x i64>* %a + %res = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> %op) + ret i64 %res +} + +define i64 @andv_v32i64(<32 x i64>* %a) #0 { +; CHECK-LABEL: andv_v32i64: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: andv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_2048-NEXT: ret + %op = load <32 x i64>, <32 x i64>* %a + %res = call i64 @llvm.experimental.vector.reduce.and.v32i64(<32 x i64> %op) + ret i64 %res +} + +attributes #0 = { "target-features"="+sve" } + +declare i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v256i8(<256 x i8>) + +declare i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v128i16(<128 x i16>) + +declare i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.and.v64i32(<64 x i32>) + +declare i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.and.v32i64(<32 x i64>) From 1dce692de1896412693f25a3afb4818883a611e7 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 5 Oct 2020 12:34:39 -0400 Subject: [PATCH 039/321] Revert "[OpenMP] Add Error Handling for Conflicting Pointer Sizes for Target Offload" Reverting because detecting architecture size doesn't work on all platforms. This reverts commit eaf73293cb6b8d45dd85ffced57aea7ad4177754. --- .../clang/Basic/DiagnosticDriverKinds.td | 1 - clang/lib/Frontend/CompilerInvocation.cpp | 8 ------ .../distribute_parallel_for_if_codegen.cpp | 24 ++++++++--------- ...ibute_parallel_for_num_threads_codegen.cpp | 24 ++++++++--------- ...istribute_parallel_for_simd_if_codegen.cpp | 26 +++++++++---------- ..._parallel_for_simd_num_threads_codegen.cpp | 24 ++++++++--------- ...arallel_reduction_codegen_tbaa_PR46146.cpp | 4 +-- ...get_incompatible_architecture_messages.cpp | 14 ---------- ...ams_distribute_parallel_for_if_codegen.cpp | 24 ++++++++--------- ...istribute_parallel_for_simd_if_codegen.cpp | 26 +++++++++---------- ...ams_distribute_parallel_for_if_codegen.cpp | 24 ++++++++--------- ...istribute_parallel_for_simd_if_codegen.cpp | 26 +++++++++---------- llvm/utils/lit/lit/llvm/config.py | 2 -- 13 files changed, 101 insertions(+), 126 deletions(-) delete mode 100644 clang/test/OpenMP/target_incompatible_architecture_messages.cpp diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 29bc19e5a84e5..3bf1bb19b7ae3 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -253,7 +253,6 @@ def err_drv_optimization_remark_format : Error< "unknown remark serializer format: '%0'">; def err_drv_no_neon_modifier : Error<"[no]neon is not accepted as modifier, please use [no]simd instead">; def err_drv_invalid_omp_target : Error<"OpenMP target is invalid: '%0'">; -def err_drv_incompatible_omp_arch : Error<"OpenMP target architecture '%0' pointer size is incompatible with host '%1'">; def err_drv_omp_host_ir_file_not_found : Error< "The provided host compiler IR file '%0' is required to generate code for OpenMP target regions but cannot be found.">; def err_drv_omp_host_target_not_supported : Error< diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index bbdf0e3be7ae0..b402f53cc765b 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3206,14 +3206,6 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, TT.getArch() == llvm::Triple::x86 || TT.getArch() == llvm::Triple::x86_64)) Diags.Report(diag::err_drv_invalid_omp_target) << A->getValue(i); - else if ((T.isArch64Bit() && TT.isArch32Bit()) || - (T.isArch64Bit() && TT.isArch16Bit()) || - (T.isArch32Bit() && TT.isArch64Bit()) || - (T.isArch32Bit() && TT.isArch16Bit()) || - (T.isArch16Bit() && TT.isArch32Bit()) || - (T.isArch16Bit() && TT.isArch64Bit())) - Diags.Report(diag::err_drv_incompatible_omp_arch) - << A->getValue(i) << T.str(); else Opts.OMPTargetTriples.push_back(TT); } diff --git a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp index 2b766f136d1d9..c62832a2705fa 100644 --- a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp @@ -1,19 +1,19 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics diff --git a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp index d72dab1832b89..ba8545a297c0e 100644 --- a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp @@ -1,19 +1,19 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp index 219bad5ad7941..1e6f0c67247f8 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp @@ -1,16 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 - -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics #ifndef HEADER diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp index eeb7dd0caaf39..09bc36388d69d 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp @@ -1,19 +1,19 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp index 031c7b6c778e4..aefe00f1cadf9 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -1,8 +1,8 @@ // RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s // RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER diff --git a/clang/test/OpenMP/target_incompatible_architecture_messages.cpp b/clang/test/OpenMP/target_incompatible_architecture_messages.cpp deleted file mode 100644 index f0f9d236d764d..0000000000000 --- a/clang/test/OpenMP/target_incompatible_architecture_messages.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: not %clang_cc1 -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -o - %s 2>&1 | FileCheck %s -// RUN: not %clang_cc1 -x c++ -fopenmp -triple i386-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -o - %s 2>&1 | FileCheck %s -// RUN: not %clang_cc1 -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -o - %s 2>&1 | FileCheck %s -// RUN: not %clang_cc1 -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -o - %s 2>&1 | FileCheck %s -// CHECK: error: OpenMP target architecture '{{.+}}' pointer size is incompatible with host '{{.+}}' -#ifndef HEADER -#define HEADER - -void test() { -#pragma omp target - {} -} - -#endif diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp index 1f6c96a3fad5c..87538e4d50da2 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp @@ -1,19 +1,19 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY // SIMD-ONLY-NOT: {{__kmpc|__tgt}} -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY // SIMD-ONLY-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp index 107400192a24e..50de0a5e1dff3 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp @@ -1,16 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 - -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY // SIMD-ONLY-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp index 3c2664b558fe8..55f76aa8aa423 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp @@ -1,19 +1,19 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp index 0afb9c9b7f1db..2f37f23775804 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp @@ -1,16 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 - -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=%omp_powerpc_triple -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP45 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics #ifndef HEADER diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py index 6fd7c4434e3ae..c8013945e3f93 100644 --- a/llvm/utils/lit/lit/llvm/config.py +++ b/llvm/utils/lit/lit/llvm/config.py @@ -456,8 +456,6 @@ def use_clang(self, additional_tool_dirs=[], additional_flags=[], required=True) self.make_itanium_abi_triple(self.config.target_triple))) self.config.substitutions.append(('%ms_abi_triple', self.make_msabi_triple(self.config.target_triple))) - self.config.substitutions.append(('%omp_powerpc_triple', - 'powerpc' + str(sys.hash_info.width) + 'le-ibm-linux-gnu')) self.config.substitutions.append( ('%resource_dir', builtin_include_dir)) From 8d51d37e0628bde3eb5a3200507ba7135dfc2751 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Mon, 5 Oct 2020 17:42:00 +0100 Subject: [PATCH 040/321] [flang] Introduce DiagnosticConsumer classes in libflangFrontend Currently Flang uses TextDiagnostic, TextDiagnosticPrinter & TestDiagnosticBuffer classes from Clang (more specifically, from libclangFrontend). This patch introduces simplified equivalents of these classes in Flang (i.e. it removes the dependency on libclangFrontend). Flang only needs these diagnostics classes for the compiler driver diagnostics. This is unlike in Clang in which similar diagnostic classes are used for e.g. Lexing/Parsing/Sema diagnostics. For this reason, the implementations introduced here are relatively basic. We can extend them in the future if this is required. This patch also enhances how the diagnostics are printed. In particular, this is the diagnostic that you'd get _before_ the changes introduced here (no text formatting): ``` $ bin/flang-new error: no input files ``` This is the diagnostic that you get _after_ the changes introduced here (in terminals that support it, the text is formatted - bold + red): ``` $ bin/flang-new flang-new: error: no input files ``` Tests are updated accordingly and options related to enabling/disabling color diagnostics are flagged as supported by Flang. Reviewed By: sameeranjoshi, CarolineConcatto Differential Revision: https://reviews.llvm.org/D87774 --- clang/include/clang/Driver/Options.td | 3 +- .../flang/Frontend/CompilerInvocation.h | 9 ++ flang/include/flang/Frontend/TextDiagnostic.h | 70 +++++++++++++ .../flang/Frontend/TextDiagnosticBuffer.h | 52 ++++++++++ .../flang/Frontend/TextDiagnosticPrinter.h | 55 +++++++++++ flang/lib/Frontend/CMakeLists.txt | 6 +- flang/lib/Frontend/CompilerInstance.cpp | 4 +- flang/lib/Frontend/CompilerInvocation.cpp | 43 ++++++++ flang/lib/Frontend/TextDiagnostic.cpp | 97 +++++++++++++++++++ flang/lib/Frontend/TextDiagnosticBuffer.cpp | 74 ++++++++++++++ flang/lib/Frontend/TextDiagnosticPrinter.cpp | 55 +++++++++++ flang/test/Flang-Driver/driver-error-cc1.c | 2 +- flang/test/Flang-Driver/driver-error-cc1.cpp | 2 +- flang/test/Flang-Driver/driver-help.f90 | 24 +++-- flang/test/Flang-Driver/driver-version.f90 | 2 +- flang/test/Flang-Driver/missing-input.f90 | 2 +- flang/tools/flang-driver/driver.cpp | 24 ++++- flang/tools/flang-driver/fc1_main.cpp | 8 +- .../Frontend/CompilerInstanceTest.cpp | 4 +- 19 files changed, 511 insertions(+), 25 deletions(-) create mode 100644 flang/include/flang/Frontend/TextDiagnostic.h create mode 100644 flang/include/flang/Frontend/TextDiagnosticBuffer.h create mode 100644 flang/include/flang/Frontend/TextDiagnosticPrinter.h create mode 100644 flang/lib/Frontend/TextDiagnostic.cpp create mode 100644 flang/lib/Frontend/TextDiagnosticBuffer.cpp create mode 100644 flang/lib/Frontend/TextDiagnosticPrinter.cpp diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 18a1234762536..e65a68c0deaae 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -876,7 +876,8 @@ def fclang_abi_compat_EQ : Joined<["-"], "fclang-abi-compat=">, Group, MetaVarName<"">, Values<".,latest">, HelpText<"Attempt to match the ABI of Clang ">; def fclasspath_EQ : Joined<["-"], "fclasspath=">, Group; -defm color_diagnostics : OptInFFlag<"color-diagnostics", "Enable", "Disable", " colors in diagnostics", [CoreOption]>; +defm color_diagnostics : OptInFFlag<"color-diagnostics", "Enable", "Disable", " colors in diagnostics", + [CoreOption, FlangOption]>; def fdiagnostics_color : Flag<["-"], "fdiagnostics-color">, Group, Flags<[CoreOption, DriverOption]>; def fdiagnostics_color_EQ : Joined<["-"], "fdiagnostics-color=">, Group; diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index 0fa169fd16200..05f93293d0a5e 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -11,8 +11,17 @@ #include "flang/Frontend/FrontendOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticOptions.h" +#include "llvm/Option/ArgList.h" namespace Fortran::frontend { + +/// Fill out Opts based on the options given in Args. +/// +/// When errors are encountered, return false and, if Diags is non-null, +/// report the error(s). +bool ParseDiagnosticArgs(clang::DiagnosticOptions &opts, + llvm::opt::ArgList &args, bool defaultDiagColor = true); + class CompilerInvocationBase { public: /// Options controlling the diagnostic engine.$ diff --git a/flang/include/flang/Frontend/TextDiagnostic.h b/flang/include/flang/Frontend/TextDiagnostic.h new file mode 100644 index 0000000000000..f803058c88c5e --- /dev/null +++ b/flang/include/flang/Frontend/TextDiagnostic.h @@ -0,0 +1,70 @@ +//===--- TextDiagnostic.h - Text Diagnostic Pretty-Printing -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A utility class that provides support for textual pretty-printing of +// diagnostics. Based on clang::TextDiagnostic (this is a trimmed version). +// +// TODO: If expanding, consider sharing the implementation with Clang. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FLANG_FRONTEND_TEXTDIAGNOSTIC_H +#define LLVM_FLANG_FRONTEND_TEXTDIAGNOSTIC_H + +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" + +namespace Fortran::frontend { + +/// Class to encapsulate the logic for formatting and printing a textual +/// diagnostic message. +/// +/// The purpose of this class is to isolate the implementation of printing +/// beautiful text diagnostics from any particular interfaces. Currently only +/// simple diagnostics that lack source location information are supported (e.g. +/// Flang driver errors). +/// +/// In the future we can extend this class (akin to Clang) to support more +/// complex diagnostics that would include macro backtraces, caret diagnostics, +/// FixIt Hints and code snippets. +/// +class TextDiagnostic { +public: + TextDiagnostic(); + + ~TextDiagnostic(); + + /// Print the diagnostic level to a llvm::raw_ostream. + /// + /// This is a static helper that handles colorizing the level and formatting + /// it into an arbitrary output stream. + /// + /// \param os Where the message is printed + /// \param level The diagnostic level (e.g. error or warning) + /// \param showColors Enable colorizing of the message. + static void PrintDiagnosticLevel(llvm::raw_ostream &os, + clang::DiagnosticsEngine::Level level, bool showColors); + + /// Pretty-print a diagnostic message to a llvm::raw_ostream. + /// + /// This is a static helper to handle the colorizing and rendering diagnostic + /// message to a particular ostream. In the future we can + /// extend it to support e.g. line wrapping. It is + /// publicly visible as at this stage we don't require any state data to + /// print a diagnostic. + /// + /// \param os Where the message is printed + /// \param isSupplemental true if this is a continuation note diagnostic + /// \param message The text actually printed + /// \param showColors Enable colorizing of the message. + static void PrintDiagnosticMessage(llvm::raw_ostream &os, bool isSupplemental, + llvm::StringRef message, bool showColors); +}; + +} // namespace Fortran::frontend + +#endif diff --git a/flang/include/flang/Frontend/TextDiagnosticBuffer.h b/flang/include/flang/Frontend/TextDiagnosticBuffer.h new file mode 100644 index 0000000000000..7322a5745c79e --- /dev/null +++ b/flang/include/flang/Frontend/TextDiagnosticBuffer.h @@ -0,0 +1,52 @@ +//===- TextDiagnosticBuffer.h - Buffer Text Diagnostics ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a concrete diagnostic client. The diagnostics are buffered rather +// than printed. In order to print them, use the FlushDiagnostics method. +// Pretty-printing is not supported. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FLANG_FRONTEND_TEXTDIAGNOSTICBUFFER_H +#define LLVM_FLANG_FRONTEND_TEXTDIAGNOSTICBUFFER_H + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/SourceLocation.h" +#include +#include +#include +#include + +namespace Fortran::frontend { + +class TextDiagnosticBuffer : public clang::DiagnosticConsumer { +public: + using DiagList = std::vector>; + using DiagnosticsLevelAndIndexPairs = + std::vector>; + +private: + DiagList errors_, warnings_, remarks_, notes_; + + /// All diagnostics in the order in which they were generated. That order + /// likely doesn't correspond to user input order, but at least it keeps + /// notes in the right places. Each pair is a diagnostic level and an index + /// into the corresponding DiagList above. + DiagnosticsLevelAndIndexPairs all_; + +public: + void HandleDiagnostic(clang::DiagnosticsEngine::Level diagLevel, + const clang::Diagnostic &info) override; + + /// Flush the buffered diagnostics to a given diagnostic engine. + void FlushDiagnostics(clang::DiagnosticsEngine &diags) const; +}; + +} // namespace Fortran::frontend + +#endif // LLVM_CLANG_FRONTEND_TEXTDIAGNOSTICBUFFER_H diff --git a/flang/include/flang/Frontend/TextDiagnosticPrinter.h b/flang/include/flang/Frontend/TextDiagnosticPrinter.h new file mode 100644 index 0000000000000..b67731d843522 --- /dev/null +++ b/flang/include/flang/Frontend/TextDiagnosticPrinter.h @@ -0,0 +1,55 @@ +//===--- TextDiagnosticPrinter.h - Text Diagnostic Client -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a concrete diagnostic client. In terminals that support it, the +// diagnostics are pretty-printed (colors + bold). The printing/flushing +// happens in HandleDiagnostics (usually called at the point when the +// diagnostic is generated). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FLANG_FRONTEND_TEXTDIAGNOSTICPRINTER_H +#define LLVM_FLANG_FRONTEND_TEXTDIAGNOSTICPRINTER_H + +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { +class DiagnosticOptions; +class DiagnosticsEngine; +}; // namespace clang + +using llvm::IntrusiveRefCntPtr; +using llvm::raw_ostream; + +namespace Fortran::frontend { +class TextDiagnostic; + +class TextDiagnosticPrinter : public clang::DiagnosticConsumer { + raw_ostream &os_; + llvm::IntrusiveRefCntPtr diagOpts_; + + /// A string to prefix to error messages. + std::string prefix_; + +public: + TextDiagnosticPrinter(raw_ostream &os, clang::DiagnosticOptions *diags); + ~TextDiagnosticPrinter() override; + + /// Set the diagnostic printer prefix string, which will be printed at the + /// start of any diagnostics. If empty, no prefix string is used. + void set_prefix(std::string value) { prefix_ = std::move(value); } + + void HandleDiagnostic(clang::DiagnosticsEngine::Level level, + const clang::Diagnostic &info) override; +}; + +} // namespace Fortran::frontend + +#endif diff --git a/flang/lib/Frontend/CMakeLists.txt b/flang/lib/Frontend/CMakeLists.txt index fe74662c05ced..3cebc959c5eca 100644 --- a/flang/lib/Frontend/CMakeLists.txt +++ b/flang/lib/Frontend/CMakeLists.txt @@ -2,6 +2,9 @@ add_flang_library(flangFrontend CompilerInstance.cpp CompilerInvocation.cpp FrontendOptions.cpp + TextDiagnosticPrinter.cpp + TextDiagnosticBuffer.cpp + TextDiagnostic.cpp DEPENDS clangBasic @@ -9,9 +12,6 @@ add_flang_library(flangFrontend LINK_LIBS clangBasic clangDriver - # TODO: Added to re-use clang's TextDiagnosticBuffer & TextDiagnosticPrinter. - # Add a custom implementation for Flang and remove this dependency. - clangFrontend LINK_COMPONENTS Option diff --git a/flang/lib/Frontend/CompilerInstance.cpp b/flang/lib/Frontend/CompilerInstance.cpp index bf1461dd16ad6..dd92639c3dc2e 100644 --- a/flang/lib/Frontend/CompilerInstance.cpp +++ b/flang/lib/Frontend/CompilerInstance.cpp @@ -8,7 +8,7 @@ #include "flang/Frontend/CompilerInstance.h" #include "flang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" +#include "flang/Frontend/TextDiagnosticPrinter.h" #include "llvm/Support/raw_ostream.h" using namespace Fortran::frontend; @@ -36,7 +36,7 @@ CompilerInstance::CreateDiagnostics(clang::DiagnosticOptions *opts, if (client) { diags->setClient(client, shouldOwnClient); } else { - diags->setClient(new clang::TextDiagnosticPrinter(llvm::errs(), opts)); + diags->setClient(new TextDiagnosticPrinter(llvm::errs(), opts)); } return diags; } diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index c68ad5c11d65a..aef5e3c95d054 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -17,6 +17,7 @@ #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/OptTable.h" +#include "llvm/Support/Process.h" #include "llvm/Support/raw_ostream.h" using namespace Fortran::frontend; @@ -35,6 +36,48 @@ CompilerInvocationBase::~CompilerInvocationBase() = default; //===----------------------------------------------------------------------===// // Deserialization (from args) //===----------------------------------------------------------------------===// +static bool parseShowColorsArgs( + const llvm::opt::ArgList &args, bool defaultColor) { + // Color diagnostics default to auto ("on" if terminal supports) in the driver + // but default to off in cc1, needing an explicit OPT_fdiagnostics_color. + // Support both clang's -f[no-]color-diagnostics and gcc's + // -f[no-]diagnostics-colors[=never|always|auto]. + enum { + Colors_On, + Colors_Off, + Colors_Auto + } ShowColors = defaultColor ? Colors_Auto : Colors_Off; + + for (auto *a : args) { + const llvm::opt::Option &O = a->getOption(); + if (O.matches(clang::driver::options::OPT_fcolor_diagnostics) || + O.matches(clang::driver::options::OPT_fdiagnostics_color)) { + ShowColors = Colors_On; + } else if (O.matches(clang::driver::options::OPT_fno_color_diagnostics) || + O.matches(clang::driver::options::OPT_fno_diagnostics_color)) { + ShowColors = Colors_Off; + } else if (O.matches(clang::driver::options::OPT_fdiagnostics_color_EQ)) { + llvm::StringRef value(a->getValue()); + if (value == "always") + ShowColors = Colors_On; + else if (value == "never") + ShowColors = Colors_Off; + else if (value == "auto") + ShowColors = Colors_Auto; + } + } + + return ShowColors == Colors_On || + (ShowColors == Colors_Auto && llvm::sys::Process::StandardErrHasColors()); +} + +bool Fortran::frontend::ParseDiagnosticArgs(clang::DiagnosticOptions &opts, + llvm::opt::ArgList &args, bool defaultDiagColor) { + opts.ShowColors = parseShowColorsArgs(args, defaultDiagColor); + + return true; +} + static InputKind ParseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, clang::DiagnosticsEngine &diags) { // Identify the action (i.e. opts.ProgramAction) diff --git a/flang/lib/Frontend/TextDiagnostic.cpp b/flang/lib/Frontend/TextDiagnostic.cpp new file mode 100644 index 0000000000000..0a908537209e6 --- /dev/null +++ b/flang/lib/Frontend/TextDiagnostic.cpp @@ -0,0 +1,97 @@ +//===--- TextDiagnostic.cpp - Text Diagnostic Pretty-Printing -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Frontend/TextDiagnostic.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "llvm/Support/raw_ostream.h" + +using namespace Fortran::frontend; + +// TODO: Similar enums are defined in clang/lib/Frontend/TextDiagnostic.cpp. +// It would be best to share them +static const enum llvm::raw_ostream::Colors noteColor = + llvm::raw_ostream::BLACK; +static const enum llvm::raw_ostream::Colors remarkColor = + llvm::raw_ostream::BLUE; +static const enum llvm::raw_ostream::Colors warningColor = + llvm::raw_ostream::MAGENTA; +static const enum llvm::raw_ostream::Colors errorColor = llvm::raw_ostream::RED; +static const enum llvm::raw_ostream::Colors fatalColor = llvm::raw_ostream::RED; +// Used for changing only the bold attribute. +static const enum llvm::raw_ostream::Colors savedColor = + llvm::raw_ostream::SAVEDCOLOR; + +TextDiagnostic::TextDiagnostic() {} + +TextDiagnostic::~TextDiagnostic() {} + +/*static*/ void TextDiagnostic::PrintDiagnosticLevel(llvm::raw_ostream &os, + clang::DiagnosticsEngine::Level level, bool showColors) { + if (showColors) { + // Print diagnostic category in bold and color + switch (level) { + case clang::DiagnosticsEngine::Ignored: + llvm_unreachable("Invalid diagnostic type"); + case clang::DiagnosticsEngine::Note: + os.changeColor(noteColor, true); + break; + case clang::DiagnosticsEngine::Remark: + os.changeColor(remarkColor, true); + break; + case clang::DiagnosticsEngine::Warning: + os.changeColor(warningColor, true); + break; + case clang::DiagnosticsEngine::Error: + os.changeColor(errorColor, true); + break; + case clang::DiagnosticsEngine::Fatal: + os.changeColor(fatalColor, true); + break; + } + } + + switch (level) { + case clang::DiagnosticsEngine::Ignored: + llvm_unreachable("Invalid diagnostic type"); + case clang::DiagnosticsEngine::Note: + os << "note"; + break; + case clang::DiagnosticsEngine::Remark: + os << "remark"; + break; + case clang::DiagnosticsEngine::Warning: + os << "warning"; + break; + case clang::DiagnosticsEngine::Error: + os << "error"; + break; + case clang::DiagnosticsEngine::Fatal: + os << "fatal error"; + break; + } + + os << ": "; + + if (showColors) + os.resetColor(); +} + +/*static*/ +void TextDiagnostic::PrintDiagnosticMessage(llvm::raw_ostream &os, + bool isSupplemental, llvm::StringRef message, bool showColors) { + if (showColors && !isSupplemental) { + // Print primary diagnostic messages in bold and without color. + os.changeColor(savedColor, true); + } + + os << message; + + if (showColors) + os.resetColor(); + os << '\n'; +} diff --git a/flang/lib/Frontend/TextDiagnosticBuffer.cpp b/flang/lib/Frontend/TextDiagnosticBuffer.cpp new file mode 100644 index 0000000000000..6ac0f18b5ee39 --- /dev/null +++ b/flang/lib/Frontend/TextDiagnosticBuffer.cpp @@ -0,0 +1,74 @@ +//===- TextDiagnosticBuffer.cpp - Buffer Text Diagnostics -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a concrete diagnostic client, which buffers the diagnostic messages. +// +//===----------------------------------------------------------------------===// + +#include "flang/Frontend/TextDiagnosticBuffer.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace Fortran::frontend; + +/// HandleDiagnostic - Store the errors, warnings, and notes that are +/// reported. +void TextDiagnosticBuffer::HandleDiagnostic( + clang::DiagnosticsEngine::Level level, const clang::Diagnostic &info) { + // Default implementation (warnings_/errors count). + DiagnosticConsumer::HandleDiagnostic(level, info); + + llvm::SmallString<100> buf; + info.FormatDiagnostic(buf); + switch (level) { + default: + llvm_unreachable("Diagnostic not handled during diagnostic buffering!"); + case clang::DiagnosticsEngine::Note: + all_.emplace_back(level, notes_.size()); + notes_.emplace_back(info.getLocation(), std::string(buf.str())); + break; + case clang::DiagnosticsEngine::Warning: + all_.emplace_back(level, warnings_.size()); + warnings_.emplace_back(info.getLocation(), std::string(buf.str())); + break; + case clang::DiagnosticsEngine::Remark: + all_.emplace_back(level, remarks_.size()); + remarks_.emplace_back(info.getLocation(), std::string(buf.str())); + break; + case clang::DiagnosticsEngine::Error: + case clang::DiagnosticsEngine::Fatal: + all_.emplace_back(level, errors_.size()); + errors_.emplace_back(info.getLocation(), std::string(buf.str())); + break; + } +} + +void TextDiagnosticBuffer::FlushDiagnostics( + clang::DiagnosticsEngine &Diags) const { + for (const auto &i : all_) { + auto Diag = Diags.Report(Diags.getCustomDiagID(i.first, "%0")); + switch (i.first) { + default: + llvm_unreachable("Diagnostic not handled during diagnostic flushing!"); + case clang::DiagnosticsEngine::Note: + Diag << notes_[i.second].second; + break; + case clang::DiagnosticsEngine::Warning: + Diag << warnings_[i.second].second; + break; + case clang::DiagnosticsEngine::Remark: + Diag << remarks_[i.second].second; + break; + case clang::DiagnosticsEngine::Error: + case clang::DiagnosticsEngine::Fatal: + Diag << errors_[i.second].second; + break; + } + } +} diff --git a/flang/lib/Frontend/TextDiagnosticPrinter.cpp b/flang/lib/Frontend/TextDiagnosticPrinter.cpp new file mode 100644 index 0000000000000..20cbe75409747 --- /dev/null +++ b/flang/lib/Frontend/TextDiagnosticPrinter.cpp @@ -0,0 +1,55 @@ +//===--- TextDiagnosticPrinter.cpp - Diagnostic Printer -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This diagnostic client prints out their diagnostic messages. +// +//===----------------------------------------------------------------------===// + +#include "flang/Frontend/TextDiagnosticPrinter.h" +#include "flang/Frontend/TextDiagnostic.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace Fortran::frontend; + +TextDiagnosticPrinter::TextDiagnosticPrinter( + raw_ostream &os, clang::DiagnosticOptions *diags) + : os_(os), diagOpts_(diags) {} + +TextDiagnosticPrinter::~TextDiagnosticPrinter() {} + +void TextDiagnosticPrinter::HandleDiagnostic( + clang::DiagnosticsEngine::Level level, const clang::Diagnostic &info) { + // Default implementation (Warnings/errors count). + DiagnosticConsumer::HandleDiagnostic(level, info); + + // Render the diagnostic message into a temporary buffer eagerly. We'll use + // this later as we print out the diagnostic to the terminal. + llvm::SmallString<100> outStr; + info.FormatDiagnostic(outStr); + + llvm::raw_svector_ostream DiagMessageStream(outStr); + + if (!prefix_.empty()) + os_ << prefix_ << ": "; + + // We only emit diagnostics in contexts that lack valid source locations. + assert(!info.getLocation().isValid() && + "Diagnostics with valid source location are not supported"); + + Fortran::frontend::TextDiagnostic::PrintDiagnosticLevel( + os_, level, diagOpts_->ShowColors); + Fortran::frontend::TextDiagnostic::PrintDiagnosticMessage(os_, + /*IsSupplemental=*/level == clang::DiagnosticsEngine::Note, + DiagMessageStream.str(), diagOpts_->ShowColors); + + os_.flush(); + return; +} diff --git a/flang/test/Flang-Driver/driver-error-cc1.c b/flang/test/Flang-Driver/driver-error-cc1.c index 1563ee431579f..aed0f74ecd0bf 100644 --- a/flang/test/Flang-Driver/driver-error-cc1.c +++ b/flang/test/Flang-Driver/driver-error-cc1.c @@ -4,4 +4,4 @@ // C files are currently not supported (i.e. `flang -cc1`) -// CHECK:error: unknown integrated tool '-cc1'. Valid tools include '-fc1'. +// CHECK: error: unknown integrated tool '-cc1'. Valid tools include '-fc1'. diff --git a/flang/test/Flang-Driver/driver-error-cc1.cpp b/flang/test/Flang-Driver/driver-error-cc1.cpp index 20e469733bc9a..0de39dc90ea21 100644 --- a/flang/test/Flang-Driver/driver-error-cc1.cpp +++ b/flang/test/Flang-Driver/driver-error-cc1.cpp @@ -4,4 +4,4 @@ // C++ files are currently not supported (i.e. `flang -cc1`) -// CHECK:error: unknown integrated tool '-cc1'. Valid tools include '-fc1'. +// CHECK: error: unknown integrated tool '-cc1'. Valid tools include '-fc1'. diff --git a/flang/test/Flang-Driver/driver-help.f90 b/flang/test/Flang-Driver/driver-help.f90 index 6ecd076efee4e..aafc5630b2704 100644 --- a/flang/test/Flang-Driver/driver-help.f90 +++ b/flang/test/Flang-Driver/driver-help.f90 @@ -1,13 +1,21 @@ -! RUN: %flang-new -help 2>&1 | FileCheck %s -! RUN: %flang-new -fc1 -help 2>&1 | FileCheck %s +! RUN: %flang-new -help 2>&1 | FileCheck %s --check-prefix=HELP +! RUN: %flang-new -fc1 -help 2>&1 | FileCheck %s --check-prefix=HELP-FC1 ! RUN: not %flang-new -helps 2>&1 | FileCheck %s --check-prefix=ERROR ! REQUIRES: new-flang-driver -! CHECK:USAGE: flang-new -! CHECK-EMPTY: -! CHECK-NEXT:OPTIONS: -! CHECK-NEXT: -help Display available options -! CHECK-NEXT: --version Print version information +! HELP:USAGE: flang-new +! HELP-EMPTY: +! HELP-NEXT:OPTIONS: +! HELP-NEXT: -fcolor-diagnostics Enable colors in diagnostics +! HELP-NEXT: -fno-color-diagnostics Disable colors in diagnostics +! HELP-NEXT: -help Display available options +! HELP-NEXT: --version Print version information -! ERROR: error: unknown argument '-helps'; did you mean '-help' +! HELP-FC1:USAGE: flang-new +! HELP-FC1-EMPTY: +! HELP-FC1-NEXT:OPTIONS: +! HELP-FC1-NEXT: -help Display available options +! HELP-FC1-NEXT: --version Print version information + +! ERROR: flang-new: error: unknown argument '-helps'; did you mean '-help' diff --git a/flang/test/Flang-Driver/driver-version.f90 b/flang/test/Flang-Driver/driver-version.f90 index 8552d0b2f28b4..199770bd9e508 100644 --- a/flang/test/Flang-Driver/driver-version.f90 +++ b/flang/test/Flang-Driver/driver-version.f90 @@ -8,4 +8,4 @@ ! CHECK-NEXT:Thread model: ! CHECK-NEXT:InstalledDir: -! ERROR: error: unsupported option '--versions'; did you mean '--version'? +! ERROR: flang-new: error: unsupported option '--versions'; did you mean '--version'? diff --git a/flang/test/Flang-Driver/missing-input.f90 b/flang/test/Flang-Driver/missing-input.f90 index 96818bc4bd385..5e46395e8de64 100644 --- a/flang/test/Flang-Driver/missing-input.f90 +++ b/flang/test/Flang-Driver/missing-input.f90 @@ -2,4 +2,4 @@ ! REQUIRES: new-flang-driver -! CHECK: error: no input files +! CHECK: flang-new: error: no input files diff --git a/flang/tools/flang-driver/driver.cpp b/flang/tools/flang-driver/driver.cpp index 9d04994d98435..e00320096f380 100644 --- a/flang/tools/flang-driver/driver.cpp +++ b/flang/tools/flang-driver/driver.cpp @@ -11,11 +11,12 @@ // //===----------------------------------------------------------------------===// #include "clang/Driver/Driver.h" +#include "flang/Frontend/CompilerInvocation.h" +#include "flang/Frontend/TextDiagnosticPrinter.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticIDs.h" #include "clang/Basic/DiagnosticOptions.h" #include "clang/Driver/Compilation.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/Option/ArgList.h" @@ -23,6 +24,8 @@ #include "llvm/Support/InitLLVM.h" #include "llvm/Support/VirtualFileSystem.h" +using llvm::StringRef; + // main frontend method. Lives inside fc1_main.cpp extern int fc1_main(llvm::ArrayRef argv, const char *argv0); @@ -37,6 +40,17 @@ std::string GetExecutablePath(const char *argv0) { static clang::DiagnosticOptions *CreateAndPopulateDiagOpts( llvm::ArrayRef argv) { auto *diagOpts = new clang::DiagnosticOptions; + + // Ignore missingArgCount and the return value of ParseDiagnosticArgs. + // Any errors that would be diagnosed here will also be diagnosed later, + // when the DiagnosticsEngine actually exists. + unsigned missingArgIndex, missingArgCount; + llvm::opt::InputArgList args = clang::driver::getDriverOptTable().ParseArgs( + argv.slice(1), missingArgIndex, missingArgCount, + /*FlagsToInclude=*/clang::driver::options::FlangOption); + + (void)Fortran::frontend::ParseDiagnosticArgs(*diagOpts, args); + return diagOpts; } @@ -83,8 +97,12 @@ int main(int argc_, const char **argv_) { CreateAndPopulateDiagOpts(argv); llvm::IntrusiveRefCntPtr diagID( new clang::DiagnosticIDs()); - clang::TextDiagnosticPrinter *diagClient = - new clang::TextDiagnosticPrinter(llvm::errs(), &*diagOpts); + Fortran::frontend::TextDiagnosticPrinter *diagClient = + new Fortran::frontend::TextDiagnosticPrinter(llvm::errs(), &*diagOpts); + + diagClient->set_prefix( + std::string(llvm::sys::path::stem(GetExecutablePath(argv[0])))); + clang::DiagnosticsEngine diags(diagID, &*diagOpts, diagClient); // Prepare the driver diff --git a/flang/tools/flang-driver/fc1_main.cpp b/flang/tools/flang-driver/fc1_main.cpp index bb69517edde28..5f7eeb1ea5013 100644 --- a/flang/tools/flang-driver/fc1_main.cpp +++ b/flang/tools/flang-driver/fc1_main.cpp @@ -14,9 +14,9 @@ #include "flang/Frontend/CompilerInstance.h" #include "flang/Frontend/CompilerInvocation.h" +#include "flang/Frontend/TextDiagnosticBuffer.h" #include "flang/FrontendTool/Utils.h" #include "clang/Driver/DriverDiagnostic.h" -#include "clang/Frontend/TextDiagnosticBuffer.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/OptTable.h" @@ -34,18 +34,22 @@ int fc1_main(llvm::ArrayRef argv, const char *argv0) { if (!flang->HasDiagnostics()) return 1; + // We will buffer diagnostics from argument parsing so that we can output + // them using a well formed diagnostic object. + TextDiagnosticBuffer *diagsBuffer = new TextDiagnosticBuffer; + // Create CompilerInvocation - use a dedicated instance of DiagnosticsEngine // for parsing the arguments llvm::IntrusiveRefCntPtr diagID( new clang::DiagnosticIDs()); llvm::IntrusiveRefCntPtr diagOpts = new clang::DiagnosticOptions(); - clang::TextDiagnosticBuffer *diagsBuffer = new clang::TextDiagnosticBuffer; clang::DiagnosticsEngine diags(diagID, &*diagOpts, diagsBuffer); bool success = CompilerInvocation::CreateFromArgs(flang->GetInvocation(), argv, diags); diagsBuffer->FlushDiagnostics(flang->getDiagnostics()); + if (!success) return 1; diff --git a/flang/unittests/Frontend/CompilerInstanceTest.cpp b/flang/unittests/Frontend/CompilerInstanceTest.cpp index 866d097dc48b1..04d581cd35cbd 100644 --- a/flang/unittests/Frontend/CompilerInstanceTest.cpp +++ b/flang/unittests/Frontend/CompilerInstanceTest.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "flang/Frontend/CompilerInstance.h" +#include "flang/Frontend/TextDiagnosticPrinter.h" #include "clang/Basic/DiagnosticOptions.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" #include "gtest/gtest.h" @@ -21,7 +21,7 @@ TEST(CompilerInstance, AllowDiagnosticLogWithUnownedDiagnosticConsumer) { // 1. Set-up a basic DiagnosticConsumer std::string diagnosticOutput; llvm::raw_string_ostream diagnosticsOS(diagnosticOutput); - auto diagPrinter = std::make_unique( + auto diagPrinter = std::make_unique( diagnosticsOS, new clang::DiagnosticOptions()); // 2. Create a CompilerInstance (to manage a DiagnosticEngine) From afd729edee4ca0dc63771448926ab5d346161511 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 5 Oct 2020 16:55:59 +0000 Subject: [PATCH 041/321] Add definition for static constexpr member (NFC) Fix the build for some toolchain and config. --- mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 75d07f35d226f..93bdd1d89d931 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -2030,6 +2030,9 @@ struct AlignedAllocOpLowering : public AllocLikeOpLowering { static constexpr uint64_t kMinAlignedAllocAlignment = 16UL; }; +// Out of line definition, required till C++17. +constexpr uint64_t AlignedAllocOpLowering::kMinAlignedAllocAlignment; + struct AllocaOpLowering : public AllocLikeOpLowering { AllocaOpLowering(LLVMTypeConverter &converter) : AllocLikeOpLowering(AllocaOp::getOperationName(), converter) {} From 6bcaf6ff69e978cb7ecf10a16f9ba75e14ba19c2 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 5 Oct 2020 18:58:29 +0200 Subject: [PATCH 042/321] [mlir][Linalg] Remove unused variable. NFCI. --- .../mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td | 1 - 1 file changed, 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td index 44c6b77ee4046..c10a1e4f4e046 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td @@ -413,7 +413,6 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - auto range = this->getOperation()->getOperands(); return getNumInputsAndOutputBuffers() + $_op.getNumInitTensors(); }] >, From 9d630297700b94cc9c0118c65115c496c782302b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 5 Oct 2020 18:58:54 +0200 Subject: [PATCH 043/321] Revert "[DebugInfo] Improve dbg preservation in LSR." This reverts commit a3caf7f6102dc863425f9714b099af58397f0cd2. The ReleaseLTO-g test-suite configuration has been failing to build since this commit, because clang segfaults while building 7zip. --- llvm/include/llvm/Analysis/ScalarEvolution.h | 18 ++--- .../Transforms/Scalar/LoopStrengthReduce.cpp | 57 -------------- llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll | 2 +- .../LoopStrengthReduce/dbg-preserve-0.ll | 74 ------------------- 4 files changed, 10 insertions(+), 141 deletions(-) delete mode 100644 llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index ac6090a30d2ff..158257a5aa9a1 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1117,15 +1117,6 @@ class ScalarEvolution { const SCEV *S, const Loop *L, SmallPtrSetImpl &Preds); - /// Compute \p LHS - \p RHS and returns the result as an APInt if it is a - /// constant, and None if it isn't. - /// - /// This is intended to be a cheaper version of getMinusSCEV. We can be - /// frugal here since we just bail out of actually constructing and - /// canonicalizing an expression in the cases where the result isn't going - /// to be a constant. - Optional computeConstantDifference(const SCEV *LHS, const SCEV *RHS); - private: /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a /// Value is deleted. @@ -1808,6 +1799,15 @@ class ScalarEvolution { bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R, SCEV::NoWrapFlags &Flags); + /// Compute \p LHS - \p RHS and returns the result as an APInt if it is a + /// constant, and None if it isn't. + /// + /// This is intended to be a cheaper version of getMinusSCEV. We can be + /// frugal here since we just bail out of actually constructing and + /// canonicalizing an expression in the cases where the result isn't going + /// to be a constant. + Optional computeConstantDifference(const SCEV *LHS, const SCEV *RHS); + /// Drop memoized information computed for S. void forgetMemoizedResults(const SCEV *S); diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index fdb41f3e4b14b..537838e2bdc19 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -59,7 +59,6 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -81,7 +80,6 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalValue.h" @@ -5778,27 +5776,6 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, if (MSSA) MSSAU = std::make_unique(MSSA); - // Debug preservation - record all llvm.dbg.value from the loop as well as - // the SCEV of their variable location. Since salvageDebugInfo may change the - // DIExpression we need to store the original here as well (i.e. it needs to - // be in sync with the SCEV). - SmallVector< - std::tuple, - 32> - DbgValues; - for (auto &B : L->getBlocks()) { - for (auto &I : *B) { - if (DbgValueInst *D = dyn_cast(&I)) { - auto V = D->getVariableLocation(); - if (!SE.isSCEVable(V->getType())) - continue; - auto DS = SE.getSCEV(V); - DbgValues.push_back( - std::make_tuple(D, V->getType(), DS, D->getExpression())); - } - } - } - // Run the main LSR transformation. Changed |= LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()).getChanged(); @@ -5820,40 +5797,6 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); } } - // Debug preservation - go through all recorded llvm.dbg.value and for those - // that now have an undef variable location use the recorded SCEV to try and - // update it. Compare with SCEV of Phi-nodes of loop header to find a - // suitable update candidate. SCEV match with constant offset is allowed and - // will be compensated for in the DIExpression. - if (Changed) { - for (auto &D : DbgValues) { - auto DbgValue = std::get(D); - auto DbgValueType = std::get(D); - auto DbgValueSCEV = std::get(D); - auto DbgDIExpr = std::get(D); - if (!isa(DbgValue->getVariableLocation())) - continue; - for (PHINode &Phi : L->getHeader()->phis()) { - if (DbgValueType != Phi.getType()) - continue; - if (!SE.isSCEVable(Phi.getType())) - continue; - auto PhiSCEV = SE.getSCEV(&Phi); - if (Optional Offset = - SE.computeConstantDifference(DbgValueSCEV, PhiSCEV)) { - auto &Ctx = DbgValue->getContext(); - DbgValue->setOperand( - 0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(&Phi))); - if (Offset.getValue().getSExtValue()) { - SmallVector Ops; - DIExpression::appendOffset(Ops, Offset.getValue().getSExtValue()); - DbgDIExpr = DIExpression::prependOpcodes(DbgDIExpr, Ops, true); - } - DbgValue->setOperand(2, MetadataAsValue::get(Ctx, DbgDIExpr)); - } - } - } - } return Changed; } diff --git a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll index e8f37a370666c..08aecdac5b794 100644 --- a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll +++ b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll @@ -33,7 +33,7 @@ ; ASM: popl %ebx ; ASM: [[EPILOGUE]]: # %return ; ASM: retl $8 -; ASM: Ltmp11: +; ASM: Ltmp10: ; ASM: .cv_fpo_endproc ; Note how RvaStart advances 7 bytes to skip the shrink-wrapped portion. diff --git a/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll b/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll deleted file mode 100644 index 71031aabb95b7..0000000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll +++ /dev/null @@ -1,74 +0,0 @@ -; RUN: opt < %s -loop-reduce -S | FileCheck %s - -; Test that LSR preserves debug-info for induction variables. - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" - -define dso_local void @foo(i8* nocapture %p) local_unnamed_addr !dbg !7 { -; CHECK-LABEL: @foo( -entry: - call void @llvm.dbg.value(metadata i8* %p, metadata !13, metadata !DIExpression()), !dbg !16 - call void @llvm.dbg.value(metadata i8 0, metadata !14, metadata !DIExpression()), !dbg !17 - br label %for.body, !dbg !18 - -for.cond.cleanup: ; preds = %for.body - ret void, !dbg !19 - -for.body: ; preds = %entry, %for.body -; CHECK-LABEL: for.body: - %i.06 = phi i8 [ 0, %entry ], [ %inc, %for.body ] - %p.addr.05 = phi i8* [ %p, %entry ], [ %add.ptr, %for.body ] - call void @llvm.dbg.value(metadata i8 %i.06, metadata !14, metadata !DIExpression()), !dbg !17 - call void @llvm.dbg.value(metadata i8* %p.addr.05, metadata !13, metadata !DIExpression()), !dbg !16 -; CHECK-NOT: call void @llvm.dbg.value(metadata i8* undef -; CHECK: call void @llvm.dbg.value(metadata i8* %lsr.iv, metadata ![[MID_p:[0-9]+]], metadata !DIExpression(DW_OP_constu, 3, DW_OP_minus, DW_OP_stack_value)), !dbg !16 - %add.ptr = getelementptr inbounds i8, i8* %p.addr.05, i64 3, !dbg !20 - call void @llvm.dbg.value(metadata i8* %add.ptr, metadata !13, metadata !DIExpression()), !dbg !16 -; CHECK-NOT: call void @llvm.dbg.value(metadata i8* undef -; CHECK: call void @llvm.dbg.value(metadata i8* %lsr.iv, metadata ![[MID_p]], metadata !DIExpression()), !dbg !16 - store i8 %i.06, i8* %add.ptr, align 1, !dbg !23, !tbaa !24 - %inc = add nuw nsw i8 %i.06, 1, !dbg !27 - call void @llvm.dbg.value(metadata i8 %inc, metadata !14, metadata !DIExpression()), !dbg !17 - %exitcond.not = icmp eq i8 %inc, 32, !dbg !28 - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !18, !llvm.loop !29 -} - -declare void @llvm.dbg.value(metadata, metadata, metadata) - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) -!1 = !DIFile(filename: "lsrdbg.c", directory: "/") -!2 = !{} -!3 = !{i32 7, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 12.0.0"} -!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12) -!8 = !DISubroutineType(types: !9) -!9 = !{null, !10} -!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) -!11 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char) -!12 = !{!13, !14} -!13 = !DILocalVariable(name: "p", arg: 1, scope: !7, file: !1, line: 2, type: !10) -; CHECK: ![[MID_p]] = !DILocalVariable(name: "p", arg: 1, scope: !7, file: !1, line: 2, type: !10) -!14 = !DILocalVariable(name: "i", scope: !15, file: !1, line: 4, type: !11) -!15 = distinct !DILexicalBlock(scope: !7, file: !1, line: 4, column: 3) -!16 = !DILocation(line: 0, scope: !7) -!17 = !DILocation(line: 0, scope: !15) -!18 = !DILocation(line: 4, column: 3, scope: !15) -!19 = !DILocation(line: 8, column: 1, scope: !7) -!20 = !DILocation(line: 5, column: 7, scope: !21) -!21 = distinct !DILexicalBlock(scope: !22, file: !1, line: 4, column: 42) -!22 = distinct !DILexicalBlock(scope: !15, file: !1, line: 4, column: 3) -!23 = !DILocation(line: 6, column: 8, scope: !21) -!24 = !{!25, !25, i64 0} -!25 = !{!"omnipotent char", !26, i64 0} -!26 = !{!"Simple C/C++ TBAA"} -!27 = !DILocation(line: 4, column: 38, scope: !22) -!28 = !DILocation(line: 4, column: 31, scope: !22) -!29 = distinct !{!29, !18, !30, !31} -!30 = !DILocation(line: 7, column: 3, scope: !15) -!31 = !{!"llvm.loop.unroll.disable"} From 4ce61144cb49b48817f0215820173005c8d7d816 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 5 Oct 2020 17:51:13 +0100 Subject: [PATCH 044/321] [InstCombine] canEvaluateShifted - remove dead (and never used code). NFC. This was already #if'd out when it was added back in 2010 at rG18d7fc8fc6767 and has never been touched since. --- .../InstCombine/InstCombineShifts.cpp | 25 ------------------- 1 file changed, 25 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 983b45cbb11f5..6a4ecd21ea47f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -481,31 +481,6 @@ static bool canEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift, Instruction *I = dyn_cast(V); if (!I) return false; - // If this is the opposite shift, we can directly reuse the input of the shift - // if the needed bits are already zero in the input. This allows us to reuse - // the value which means that we don't care if the shift has multiple uses. - // TODO: Handle opposite shift by exact value. - ConstantInt *CI = nullptr; - if ((IsLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) || - (!IsLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) { - if (CI->getValue() == NumBits) { - // TODO: Check that the input bits are already zero with MaskedValueIsZero -#if 0 - // If this is a truncate of a logical shr, we can truncate it to a smaller - // lshr iff we know that the bits we would otherwise be shifting in are - // already zeros. - uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); - uint32_t BitWidth = Ty->getScalarSizeInBits(); - if (MaskedValueIsZero(I->getOperand(0), - APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && - CI->getLimitedValue(BitWidth) < BitWidth) { - return CanEvaluateTruncated(I->getOperand(0), Ty); - } -#endif - - } - } - // We can't mutate something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. if (!I->hasOneUse()) return false; From b326d4ff946d2061a566a3fcce9f33b484759fe0 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Tue, 6 Oct 2020 00:45:24 +0800 Subject: [PATCH 045/321] [SelectionDAG] Don't remove unused negated constant immediately This reverts partial of a2fb5446 (actually, 2508ef01) about removing negated FP constant immediately if it has no uses. However, as discussed in bug 47517, there're cases when NegX is folded into constant from other places while NegY is removed by that line of code and NegX is equal to NegY. In these cases, NegX is deleted before used and crash happens. So revert the code and add necessary test case. --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 4 +--- llvm/test/CodeGen/X86/pr47517.ll | 13 +++++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index cd49d5bfd98b9..80d92dbe886d9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5773,10 +5773,8 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, // If we already have the use of the negated floating constant, it is free // to negate it even it has multiple uses. - if (!Op.hasOneUse() && CFP.use_empty()) { - RemoveDeadNode(CFP); + if (!Op.hasOneUse() && CFP.use_empty()) break; - } Cost = NegatibleCost::Neutral; return CFP; } diff --git a/llvm/test/CodeGen/X86/pr47517.ll b/llvm/test/CodeGen/X86/pr47517.ll index 5672fbc69a41d..afc27b49ab2a4 100644 --- a/llvm/test/CodeGen/X86/pr47517.ll +++ b/llvm/test/CodeGen/X86/pr47517.ll @@ -26,3 +26,16 @@ entry: %fmul6 = fmul fast float %fmul3, %fadd4 ret float %fmul6 } + +; To ensure negated result will not be removed when NegX=NegY and +; NegX is needed +define float @test2(float %x, float %y) { + %add = fadd fast float %x, 750.0 + %sub = fsub fast float %x, %add + %mul = fmul fast float %sub, %sub + %mul2 = fmul fast float %mul, %sub + %add2 = fadd fast float %mul2, 1.0 + %add3 = fadd fast float %mul2, %add2 + %mul3 = fmul fast float %y, %add3 + ret float %mul3 +} From 32c8435ef70031d7bd3dce48e41bdce65747e123 Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Mon, 28 Sep 2020 03:37:21 -0700 Subject: [PATCH 046/321] [llvm-objcopy][MachO] Add support for universal binaries This diff adds support for universal binaries to llvm-objcopy. Test plan: make check-all Differential revision: https://reviews.llvm.org/D88400 --- .../llvm/Object/MachOUniversalWriter.h | 6 ++ llvm/lib/Object/MachOUniversalWriter.cpp | 5 ++ .../tools/llvm-objcopy/MachO/strip-all.test | 5 ++ .../llvm-objcopy/MachO/universal-object.test | 42 +++++++++++ .../tools/llvm-objcopy/MachO/MachOObjcopy.cpp | 72 +++++++++++++++++++ llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h | 4 ++ llvm/tools/llvm-objcopy/llvm-objcopy.cpp | 26 ++++++- llvm/tools/llvm-objcopy/llvm-objcopy.h | 32 +++++++++ 8 files changed, 189 insertions(+), 3 deletions(-) create mode 100644 llvm/test/tools/llvm-objcopy/MachO/universal-object.test create mode 100644 llvm/tools/llvm-objcopy/llvm-objcopy.h diff --git a/llvm/include/llvm/Object/MachOUniversalWriter.h b/llvm/include/llvm/Object/MachOUniversalWriter.h index 49352440dca17..606db94c9f202 100644 --- a/llvm/include/llvm/Object/MachOUniversalWriter.h +++ b/llvm/include/llvm/Object/MachOUniversalWriter.h @@ -43,6 +43,12 @@ class Slice { Slice(const MachOObjectFile &O, uint32_t Align); + /// This constructor takes prespecified \param CPUType, \param CPUSubType, + /// \param ArchName, \param Align instead of inferring them from the archive + /// memebers. + Slice(const Archive &A, uint32_t CPUType, uint32_t CPUSubType, + std::string ArchName, uint32_t Align); + static Expected create(const Archive &A, LLVMContext *LLVMCtx = nullptr); diff --git a/llvm/lib/Object/MachOUniversalWriter.cpp b/llvm/lib/Object/MachOUniversalWriter.cpp index 165964e077ce3..4bb467e56a6f9 100644 --- a/llvm/lib/Object/MachOUniversalWriter.cpp +++ b/llvm/lib/Object/MachOUniversalWriter.cpp @@ -75,6 +75,11 @@ static uint32_t calculateAlignment(const MachOObjectFile &ObjectFile) { } } +Slice::Slice(const Archive &A, uint32_t CPUType, uint32_t CPUSubType, + std::string ArchName, uint32_t Align) + : B(&A), CPUType(CPUType), CPUSubType(CPUSubType), + ArchName(std::move(ArchName)), P2Alignment(Align) {} + Slice::Slice(const MachOObjectFile &O, uint32_t Align) : B(&O), CPUType(O.getHeader().cputype), CPUSubType(O.getHeader().cpusubtype), diff --git a/llvm/test/tools/llvm-objcopy/MachO/strip-all.test b/llvm/test/tools/llvm-objcopy/MachO/strip-all.test index 4ff31f5c1e422..cb41b353ec53c 100644 --- a/llvm/test/tools/llvm-objcopy/MachO/strip-all.test +++ b/llvm/test/tools/llvm-objcopy/MachO/strip-all.test @@ -27,6 +27,11 @@ # cmp %t4 %t.dwarf.stripped # cmp %t5 %t.dwarf.stripped +# RUN: llvm-lipo %t.dwarf -create -output %t.dwarf.universal +# RUN: llvm-strip %t.dwarf.universal -o %t.dwarf.universal.stripped +# RUN: llvm-lipo %t.dwarf.universal.stripped -thin x86_64 -output %t6 +# RUN: cmp %t6 %t.dwarf.stripped + ## Make sure that debug sections are removed. # DWARF: Sections [ # DWARF-NOT: Name: __debug_str diff --git a/llvm/test/tools/llvm-objcopy/MachO/universal-object.test b/llvm/test/tools/llvm-objcopy/MachO/universal-object.test new file mode 100644 index 0000000000000..a6146fd56483a --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/MachO/universal-object.test @@ -0,0 +1,42 @@ +# This test verifies that llvm-objcopy copies a univeral Mach-O object file properly. + +# RUN: yaml2obj %p/Inputs/i386.yaml -o %t.i386 +# RUN: yaml2obj %p/Inputs/x86_64.yaml -o %t.x86_64 + +## Case 1: copy a universal object containing regular Mach-O objects. +# RUN: llvm-lipo %t.i386 %t.x86_64 -create -output %t.universal +# RUN: llvm-objcopy %t.universal %t.universal.copy +# RUN: llvm-lipo %t.universal.copy -archs | FileCheck --check-prefix=VERIFY_ARCHS %s +# RUN: llvm-lipo %t.universal.copy -thin i386 -output %t.i386.copy +# RUN: llvm-lipo %t.universal.copy -thin x86_64 -output %t.x86_64.copy +# RUN: cmp %t.i386 %t.i386.copy +# RUN: cmp %t.x86_64 %t.x86_64.copy + +## Case 2: copy a universal object file containing an archive. +# RUN: rm -f %t.archive.i386 +# RUN: llvm-ar cr %t.archive.i386 %t.i386 +# RUN: llvm-lipo %t.archive.i386 %t.x86_64 -create -output %t.universal.containing.archive +# RUN: llvm-objcopy %t.universal.containing.archive %t.universal.containing.archive.copy +# RUN: llvm-lipo %t.universal.containing.archive.copy -archs | FileCheck --check-prefix=VERIFY_ARCHS %s +# RUN: llvm-lipo %t.universal.containing.archive.copy -thin i386 -output %t.archive.i386.copy +# RUN: llvm-lipo %t.universal.containing.archive.copy -thin x86_64 -output %t.archive.x86_64.copy +# RUN: cmp %t.archive.i386 %t.archive.i386.copy +# RUN: cmp %t.x86_64 %t.archive.x86_64.copy + +## Case 3: copy an archive containing a universal object. +# RUN: llvm-ar cr %t.archive.containing.universal %t.universal +# RUN: llvm-objcopy %t.archive.containing.universal %t.archive.containing.universal.copy + +## Case 4: try to copy a universal object file contaning a bitcode slice. +# RUN: echo 'target triple = "arm64-apple-ios8.0.0"' | llvm-as -o %t.bitcode +# RUN: llvm-lipo %t.bitcode %t.x86_64 -create -output %t.universal.containing.bitcode +# RUN: not llvm-objcopy %t.universal.containing.bitcode %t.universal.containing.bitcode.copy 2>&1 \ +# RUN: | FileCheck --check-prefix=UNSUPPORTED_UNIVERSAL_OBJECT %s + +## Case 5: try to copy an archive containing an unsupported universal object. +# RUN: llvm-ar cr %t.archive.universal.bitcode %t.universal.containing.bitcode +# RUN: not llvm-objcopy %t.archive.universal.bitcode %t.archive.universal.bitcode.copy 2>&1 \ +# RUN: | FileCheck --check-prefix=UNSUPPORTED_UNIVERSAL_OBJECT %s + +# VERIFY_ARCHS: i386 x86_64 +# UNSUPPORTED_UNIVERSAL_OBJECT: slice for 'arm64' of the universal Mach-O binary {{.*}} is not a Mach-O object or an archive diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp index 47a08d33002af..28b4ec655a2e1 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp @@ -8,9 +8,13 @@ #include "MachOObjcopy.h" #include "../CopyConfig.h" +#include "../llvm-objcopy.h" #include "MachOReader.h" #include "MachOWriter.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/MachOUniversalWriter.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" @@ -386,6 +390,74 @@ Error executeObjcopyOnBinary(const CopyConfig &Config, return Writer.write(); } +Error executeObjcopyOnMachOUniversalBinary(CopyConfig &Config, + const MachOUniversalBinary &In, + Buffer &Out) { + SmallVector, 2> Binaries; + SmallVector Slices; + for (const auto &O : In.objects()) { + Expected> ArOrErr = O.getAsArchive(); + if (ArOrErr) { + Expected> NewArchiveMembersOrErr = + createNewArchiveMembers(Config, **ArOrErr); + if (!NewArchiveMembersOrErr) + return NewArchiveMembersOrErr.takeError(); + Expected> OutputBufferOrErr = + writeArchiveToBuffer(*NewArchiveMembersOrErr, + (*ArOrErr)->hasSymbolTable(), (*ArOrErr)->kind(), + Config.DeterministicArchives, + (*ArOrErr)->isThin()); + if (!OutputBufferOrErr) + return OutputBufferOrErr.takeError(); + Expected> BinaryOrErr = + object::createBinary(**OutputBufferOrErr); + if (!BinaryOrErr) + return BinaryOrErr.takeError(); + Binaries.emplace_back(std::move(*BinaryOrErr), + std::move(*OutputBufferOrErr)); + Slices.emplace_back(*cast(Binaries.back().getBinary()), + O.getCPUType(), O.getCPUSubType(), + O.getArchFlagName(), O.getAlign()); + continue; + } + // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class + // ObjectForArch return an Error in case of the type mismatch. We need to + // check each in turn to see what kind of slice this is, so ignore errors + // produced along the way. + consumeError(ArOrErr.takeError()); + + Expected> ObjOrErr = O.getAsObjectFile(); + if (!ObjOrErr) { + consumeError(ObjOrErr.takeError()); + return createStringError(std::errc::invalid_argument, + "slice for '%s' of the universal Mach-O binary " + "'%s' is not a Mach-O object or an archive", + O.getArchFlagName().c_str(), + Config.InputFilename.str().c_str()); + } + MemBuffer MB(O.getArchFlagName()); + if (Error E = executeObjcopyOnBinary(Config, **ObjOrErr, MB)) + return E; + std::unique_ptr OutputBuffer = + MB.releaseMemoryBuffer(); + Expected> BinaryOrErr = + object::createBinary(*OutputBuffer); + if (!BinaryOrErr) + return BinaryOrErr.takeError(); + Binaries.emplace_back(std::move(*BinaryOrErr), std::move(OutputBuffer)); + Slices.emplace_back(*cast(Binaries.back().getBinary()), + O.getAlign()); + } + Expected> B = + writeUniversalBinaryToBuffer(Slices); + if (!B) + return B.takeError(); + if (Error E = Out.allocate((*B)->getBufferSize())) + return E; + memcpy(Out.getBufferStart(), (*B)->getBufferStart(), (*B)->getBufferSize()); + return Out.commit(); +} + } // end namespace macho } // end namespace objcopy } // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h index f34e361db7ea9..c3f5391f79b6a 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h +++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h @@ -24,6 +24,10 @@ class Buffer; namespace macho { Error executeObjcopyOnBinary(const CopyConfig &Config, object::MachOObjectFile &In, Buffer &Out); + +Error executeObjcopyOnMachOUniversalBinary( + CopyConfig &Config, const object::MachOUniversalBinary &In, Buffer &Out); + } // end namespace macho } // end namespace objcopy } // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp index 175f2929eb230..8cd58572f5a19 100644 --- a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp @@ -25,6 +25,7 @@ #include "llvm/Object/ELFTypes.h" #include "llvm/Object/Error.h" #include "llvm/Object/MachO.h" +#include "llvm/Object/MachOUniversal.h" #include "llvm/Object/Wasm.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" @@ -144,6 +145,10 @@ static Error executeObjcopyOnBinary(CopyConfig &Config, object::Binary &In, return coff::executeObjcopyOnBinary(Config, *COFFBinary, Out); else if (auto *MachOBinary = dyn_cast(&In)) return macho::executeObjcopyOnBinary(Config, *MachOBinary, Out); + else if (auto *MachOUniversalBinary = + dyn_cast(&In)) + return macho::executeObjcopyOnMachOUniversalBinary( + Config, *MachOUniversalBinary, Out); else if (auto *WasmBinary = dyn_cast(&In)) return objcopy::wasm::executeObjcopyOnBinary(Config, *WasmBinary, Out); else @@ -151,7 +156,11 @@ static Error executeObjcopyOnBinary(CopyConfig &Config, object::Binary &In, "unsupported object file format"); } -static Error executeObjcopyOnArchive(CopyConfig &Config, const Archive &Ar) { +namespace llvm { +namespace objcopy { + +Expected> +createNewArchiveMembers(CopyConfig &Config, const Archive &Ar) { std::vector NewArchiveMembers; Error Err = Error::success(); for (const Archive::Child &Child : Ar.children(Err)) { @@ -166,7 +175,7 @@ static Error executeObjcopyOnArchive(CopyConfig &Config, const Archive &Ar) { MemBuffer MB(ChildNameOrErr.get()); if (Error E = executeObjcopyOnBinary(Config, *ChildOrErr->get(), MB)) - return E; + return std::move(E); Expected Member = NewArchiveMember::getOldMember(Child, Config.DeterministicArchives); @@ -178,8 +187,19 @@ static Error executeObjcopyOnArchive(CopyConfig &Config, const Archive &Ar) { } if (Err) return createFileError(Config.InputFilename, std::move(Err)); + return NewArchiveMembers; +} + +} // end namespace objcopy +} // end namespace llvm - return deepWriteArchive(Config.OutputFilename, NewArchiveMembers, +static Error executeObjcopyOnArchive(CopyConfig &Config, + const object::Archive &Ar) { + Expected> NewArchiveMembersOrErr = + createNewArchiveMembers(Config, Ar); + if (!NewArchiveMembersOrErr) + return NewArchiveMembersOrErr.takeError(); + return deepWriteArchive(Config.OutputFilename, *NewArchiveMembersOrErr, Ar.hasSymbolTable(), Ar.kind(), Config.DeterministicArchives, Ar.isThin()); } diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.h b/llvm/tools/llvm-objcopy/llvm-objcopy.h new file mode 100644 index 0000000000000..97a166769f954 --- /dev/null +++ b/llvm/tools/llvm-objcopy/llvm-objcopy.h @@ -0,0 +1,32 @@ +//===- llvm-objcopy.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_OBJCOPY_OBJCOPY_H +#define LLVM_TOOLS_OBJCOPY_OBJCOPY_H + +#include "llvm/Support/Error.h" + +namespace llvm { + +struct NewArchiveMember; + +namespace object { + +class Archive; + +} // end namespace object + +namespace objcopy { +struct CopyConfig; +Expected> +createNewArchiveMembers(CopyConfig &Config, const object::Archive &Ar); + +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_TOOLS_OBJCOPY_OBJCOPY_H From c9f5cdd4531e2836f47ceb28b80fd7f418645d96 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Mon, 5 Oct 2020 10:52:43 -0700 Subject: [PATCH 047/321] Revert "[ARM]Fold select_cc(vecreduce_[u|s][min|max], x) into VMINV or VMAXV" This reverts commit 2573cf3c3d42c943cb91b6e85b803f7671260185. These seem to break some lit tests. --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 113 --- llvm/lib/Target/ARM/ARMISelLowering.h | 4 - llvm/lib/Target/ARM/ARMInstrMVE.td | 34 - .../CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll | 647 ------------------ 4 files changed, 798 deletions(-) delete mode 100644 llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index a818b66dd96fa..798ecf2487637 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -987,8 +987,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SMAX); setTargetDAGCombine(ISD::UMAX); setTargetDAGCombine(ISD::FP_EXTEND); - setTargetDAGCombine(ISD::SELECT); - setTargetDAGCombine(ISD::SELECT_CC); } if (!Subtarget->hasFP64()) { @@ -1742,10 +1740,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VMLALVAu: return "ARMISD::VMLALVAu"; case ARMISD::VMLALVAps: return "ARMISD::VMLALVAps"; case ARMISD::VMLALVApu: return "ARMISD::VMLALVApu"; - case ARMISD::VMINVu: return "ARMISD::VMINVu"; - case ARMISD::VMINVs: return "ARMISD::VMINVs"; - case ARMISD::VMAXVu: return "ARMISD::VMAXVu"; - case ARMISD::VMAXVs: return "ARMISD::VMAXVs"; case ARMISD::UMAAL: return "ARMISD::UMAAL"; case ARMISD::UMLAL: return "ARMISD::UMLAL"; case ARMISD::SMLAL: return "ARMISD::SMLAL"; @@ -12099,111 +12093,6 @@ static SDValue PerformAddeSubeCombine(SDNode *N, return SDValue(); } -static SDValue PerformSELECTCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const ARMSubtarget *Subtarget) { - if (!Subtarget->hasMVEIntegerOps()) - return SDValue(); - - SDLoc dl(N); - SDValue SetCC; - SDValue LHS; - SDValue RHS; - ISD::CondCode CC; - SDValue TrueVal; - SDValue FalseVal; - - if (N->getOpcode() == ISD::SELECT && - N->getOperand(0)->getOpcode() == ISD::SETCC) { - SetCC = N->getOperand(0); - LHS = SetCC->getOperand(0); - RHS = SetCC->getOperand(1); - CC = cast(SetCC->getOperand(2))->get(); - TrueVal = N->getOperand(1); - FalseVal = N->getOperand(2); - } else if (N->getOpcode() == ISD::SELECT_CC) { - LHS = N->getOperand(0); - RHS = N->getOperand(1); - CC = cast(N->getOperand(4))->get(); - TrueVal = N->getOperand(2); - FalseVal = N->getOperand(3); - } else { - return SDValue(); - } - - unsigned int Opcode = 0; - if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN || - FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) && - (CC == ISD::SETULT || CC == ISD::SETUGT)) { - Opcode = ARMISD::VMINVu; - if (CC == ISD::SETUGT) - std::swap(TrueVal, FalseVal); - } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN || - FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) && - (CC == ISD::SETLT || CC == ISD::SETGT)) { - Opcode = ARMISD::VMINVs; - if (CC == ISD::SETGT) - std::swap(TrueVal, FalseVal); - } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX || - FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) && - (CC == ISD::SETUGT || CC == ISD::SETULT)) { - Opcode = ARMISD::VMAXVu; - if (CC == ISD::SETULT) - std::swap(TrueVal, FalseVal); - } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX || - FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) && - (CC == ISD::SETGT || CC == ISD::SETLT)) { - Opcode = ARMISD::VMAXVs; - if (CC == ISD::SETLT) - std::swap(TrueVal, FalseVal); - } else - return SDValue(); - - // Normalise to the right hand side being the vector reduction - switch (TrueVal->getOpcode()) { - case ISD::VECREDUCE_UMIN: - case ISD::VECREDUCE_SMIN: - case ISD::VECREDUCE_UMAX: - case ISD::VECREDUCE_SMAX: - std::swap(LHS, RHS); - std::swap(TrueVal, FalseVal); - break; - } - - EVT VectorType = FalseVal->getOperand(0).getValueType(); - - if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 && - VectorType != MVT::v4i32) - return SDValue(); - - EVT VectorScalarType = VectorType.getVectorElementType(); - - // The values being selected must also be the ones being compared - if (TrueVal != LHS || FalseVal != RHS) - return SDValue(); - - EVT LeftType = LHS->getValueType(0); - EVT RightType = RHS->getValueType(0); - - // The types must match the reduced type too - if (LeftType != VectorScalarType || RightType != VectorScalarType) - return SDValue(); - - // Legalise the scalar to an i32 - if (VectorScalarType != MVT::i32) - LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); - - // Generate the reduction as an i32 for legalisation purposes - auto Reduction = - DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0)); - - // The result isn't actually an i32 so truncate it back to its original type - if (VectorScalarType != MVT::i32) - Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction); - - return Reduction; -} - static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -16160,8 +16049,6 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; - case ISD::SELECT_CC: - case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget); case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget); case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget); case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 90cbf1eea0481..f5bb097062aff 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -241,10 +241,6 @@ class VectorType; VMLALVAu, // provided as low and high halves VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask VMLALVApu, - VMINVu, // Find minimum unsigned value of a vector and register - VMINVs, // Find minimum signed value of a vector and register - VMAXVu, // Find maximum unsigned value of a vector and register - VMAXVs, // Find maximum signed value of a vector and register SMULWB, // Signed multiply word by half word, bottom SMULWT, // Signed multiply word by half word, top diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index f7f403503dc7f..d9e9cf1176fc7 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -944,14 +944,6 @@ multiclass MVE_VMINMAXV_ty { defm u32: MVE_VMINMAXV_p; } -def SDTVecReduceR : SDTypeProfile<1, 2, [ // Reduction of an integer and vector into an integer - SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2> -]>; -def ARMVMINVu : SDNode<"ARMISD::VMINVu", SDTVecReduceR>; -def ARMVMINVs : SDNode<"ARMISD::VMINVs", SDTVecReduceR>; -def ARMVMAXVu : SDNode<"ARMISD::VMAXVu", SDTVecReduceR>; -def ARMVMAXVs : SDNode<"ARMISD::VMAXVs", SDTVecReduceR>; - defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">; defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0, "int_arm_mve_maxv">; @@ -982,32 +974,6 @@ let Predicates = [HasMVEInt] in { def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))), (i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>; - def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v16i8 MQPR:$src))), - (i32 (MVE_VMINVu8 $x, $src))>; - def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v8i16 MQPR:$src))), - (i32 (MVE_VMINVu16 $x, $src))>; - def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v4i32 MQPR:$src))), - (i32 (MVE_VMINVu32 $x, $src))>; - def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v16i8 MQPR:$src))), - (i32 (MVE_VMINVs8 $x, $src))>; - def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v8i16 MQPR:$src))), - (i32 (MVE_VMINVs16 $x, $src))>; - def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v4i32 MQPR:$src))), - (i32 (MVE_VMINVs32 $x, $src))>; - - def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v16i8 MQPR:$src))), - (i32 (MVE_VMAXVu8 $x, $src))>; - def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v8i16 MQPR:$src))), - (i32 (MVE_VMAXVu16 $x, $src))>; - def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v4i32 MQPR:$src))), - (i32 (MVE_VMAXVu32 $x, $src))>; - def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v16i8 MQPR:$src))), - (i32 (MVE_VMAXVs8 $x, $src))>; - def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v8i16 MQPR:$src))), - (i32 (MVE_VMAXVs16 $x, $src))>; - def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v4i32 MQPR:$src))), - (i32 (MVE_VMAXVs32 $x, $src))>; - } multiclass MVE_VMINMAXAV_ty { diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll deleted file mode 100644 index 13b831efabc57..0000000000000 --- a/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll +++ /dev/null @@ -1,647 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s - -define arm_aapcs_vfpcc zeroext i8 @uminv16i8(<16 x i8> %vec, i8 zeroext %min) { -; CHECK-LABEL: uminv16i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.u8 r0, q0 -; CHECK-NEXT: uxtb r0, r0 -; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec) - %cmp = icmp ult i8 %x, %min - %1 = select i1 %cmp, i8 %x, i8 %min - ret i8 %1 -} - -define arm_aapcs_vfpcc zeroext i16 @uminv8i16(<8 x i16> %vec, i16 zeroext %min) { -; CHECK-LABEL: uminv8i16: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.u16 r0, q0 -; CHECK-NEXT: uxth r0, r0 -; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec) - %cmp = icmp ult i16 %x, %min - %1 = select i1 %cmp, i16 %x, i16 %min - ret i16 %1 -} - -define arm_aapcs_vfpcc i32 @uminv4i32(<4 x i32> %vec, i32 %min) { -; CHECK-LABEL: uminv4i32: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.u32 r0, q0 -; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec) - %cmp = icmp ult i32 %x, %min - %1 = select i1 %cmp, i32 %x, i32 %min - ret i32 %1 -} - -define arm_aapcs_vfpcc signext i8 @sminv16i8(<16 x i8> %vec, i8 signext %min) { -; CHECK-LABEL: sminv16i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.s8 r0, q0 -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec) - %cmp = icmp slt i8 %x, %min - %1 = select i1 %cmp, i8 %x, i8 %min - ret i8 %1 -} - -define arm_aapcs_vfpcc signext i16 @sminv8i16(<8 x i16> %vec, i16 signext %min) { -; CHECK-LABEL: sminv8i16: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.s16 r0, q0 -; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec) - %cmp = icmp slt i16 %x, %min - %1 = select i1 %cmp, i16 %x, i16 %min - ret i16 %1 -} - -define arm_aapcs_vfpcc i32 @sminv4i32(<4 x i32> %vec, i32 %min) { -; CHECK-LABEL: sminv4i32: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.s32 r0, q0 -; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec) - %cmp = icmp slt i32 %x, %min - %1 = select i1 %cmp, i32 %x, i32 %min - ret i32 %1 -} - -define arm_aapcs_vfpcc zeroext i8 @umaxv16i8(<16 x i8> %vec, i8 zeroext %max) { -; CHECK-LABEL: umaxv16i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.u8 r0, q0 -; CHECK-NEXT: uxtb r0, r0 -; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec) - %cmp = icmp ugt i8 %x, %max - %1 = select i1 %cmp, i8 %x, i8 %max - ret i8 %1 -} - -define arm_aapcs_vfpcc zeroext i16 @umaxv8i16(<8 x i16> %vec, i16 zeroext %max) { -; CHECK-LABEL: umaxv8i16: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.u16 r0, q0 -; CHECK-NEXT: uxth r0, r0 -; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec) - %cmp = icmp ugt i16 %x, %max - %1 = select i1 %cmp, i16 %x, i16 %max - ret i16 %1 -} - -define arm_aapcs_vfpcc i32 @umaxv4i32(<4 x i32> %vec, i32 %max) { -; CHECK-LABEL: umaxv4i32: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.u32 r0, q0 -; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec) - %cmp = icmp ugt i32 %x, %max - %1 = select i1 %cmp, i32 %x, i32 %max - ret i32 %1 -} - -define arm_aapcs_vfpcc signext i8 @smaxv16i8(<16 x i8> %vec, i8 signext %max) { -; CHECK-LABEL: smaxv16i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.s8 r0, q0 -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) - %cmp = icmp sgt i8 %x, %max - %1 = select i1 %cmp, i8 %x, i8 %max - ret i8 %1 -} - -define arm_aapcs_vfpcc signext i16 @smaxv8i16(<8 x i16> %vec, i16 signext %max) { -; CHECK-LABEL: smaxv8i16: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.s16 r0, q0 -; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec) - %cmp = icmp sgt i16 %x, %max - %1 = select i1 %cmp, i16 %x, i16 %max - ret i16 %1 -} - -define arm_aapcs_vfpcc i32 @smaxv4i32(<4 x i32> %vec, i32 %max) { -; CHECK-LABEL: smaxv4i32: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.s32 r0, q0 -; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec) - %cmp = icmp sgt i32 %x, %max - %1 = select i1 %cmp, i32 %x, i32 %max - ret i32 %1 -} - -define arm_aapcs_vfpcc zeroext i8 @commute_uminv16i8(<16 x i8> %vec, i8 zeroext %min) { -; CHECK-LABEL: commute_uminv16i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.u8 r0, q0 -; CHECK-NEXT: uxtb r0, r0 -; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec) - %cmp = icmp ult i8 %min, %x - %1 = select i1 %cmp, i8 %min, i8 %x - ret i8 %1 -} - -define arm_aapcs_vfpcc zeroext i16 @commute_uminv8i16(<8 x i16> %vec, i16 zeroext %min) { -; CHECK-LABEL: commute_uminv8i16: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.u16 r0, q0 -; CHECK-NEXT: uxth r0, r0 -; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec) - %cmp = icmp ult i16 %min, %x - %1 = select i1 %cmp, i16 %min, i16 %x - ret i16 %1 -} - -define arm_aapcs_vfpcc i32 @commute_uminv4i32(<4 x i32> %vec, i32 %min) { -; CHECK-LABEL: commute_uminv4i32: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.u32 r0, q0 -; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec) - %cmp = icmp ult i32 %min, %x - %1 = select i1 %cmp, i32 %min, i32 %x - ret i32 %1 -} - -define arm_aapcs_vfpcc signext i8 @commute_sminv16i8(<16 x i8> %vec, i8 signext %min) { -; CHECK-LABEL: commute_sminv16i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.s8 r0, q0 -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec) - %cmp = icmp slt i8 %min, %x - %1 = select i1 %cmp, i8 %min, i8 %x - ret i8 %1 -} - -define arm_aapcs_vfpcc signext i16 @commute_sminv8i16(<8 x i16> %vec, i16 signext %min) { -; CHECK-LABEL: commute_sminv8i16: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.s16 r0, q0 -; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec) - %cmp = icmp slt i16 %min, %x - %1 = select i1 %cmp, i16 %min, i16 %x - ret i16 %1 -} - -define arm_aapcs_vfpcc i32 @commute_sminv4i32(<4 x i32> %vec, i32 %min) { -; CHECK-LABEL: commute_sminv4i32: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.s32 r0, q0 -; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec) - %cmp = icmp slt i32 %min, %x - %1 = select i1 %cmp, i32 %min, i32 %x - ret i32 %1 -} - -define arm_aapcs_vfpcc zeroext i8 @commute_umaxv16i8(<16 x i8> %vec, i8 zeroext %max) { -; CHECK-LABEL: commute_umaxv16i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.u8 r0, q0 -; CHECK-NEXT: uxtb r0, r0 -; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec) - %cmp = icmp ugt i8 %max, %x - %1 = select i1 %cmp, i8 %max, i8 %x - ret i8 %1 -} - -define arm_aapcs_vfpcc zeroext i16 @commute_umaxv8i16(<8 x i16> %vec, i16 zeroext %max) { -; CHECK-LABEL: commute_umaxv8i16: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.u16 r0, q0 -; CHECK-NEXT: uxth r0, r0 -; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec) - %cmp = icmp ugt i16 %max, %x - %1 = select i1 %cmp, i16 %max, i16 %x - ret i16 %1 -} - -define arm_aapcs_vfpcc i32 @commute_umaxv4i32(<4 x i32> %vec, i32 %max) { -; CHECK-LABEL: commute_umaxv4i32: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.u32 r0, q0 -; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec) - %cmp = icmp ugt i32 %max, %x - %1 = select i1 %cmp, i32 %max, i32 %x - ret i32 %1 -} - -define arm_aapcs_vfpcc signext i8 @commute_smaxv16i8(<16 x i8> %vec, i8 signext %max) { -; CHECK-LABEL: commute_smaxv16i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.s8 r0, q0 -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) - %cmp = icmp sgt i8 %max, %x - %1 = select i1 %cmp, i8 %max, i8 %x - ret i8 %1 -} - -define arm_aapcs_vfpcc signext i16 @commute_smaxv8i16(<8 x i16> %vec, i16 signext %max) { -; CHECK-LABEL: commute_smaxv8i16: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.s16 r0, q0 -; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec) - %cmp = icmp sgt i16 %max, %x - %1 = select i1 %cmp, i16 %max, i16 %x - ret i16 %1 -} - -define arm_aapcs_vfpcc i32 @commute_smaxv4i32(<4 x i32> %vec, i32 %max) { -; CHECK-LABEL: commute_smaxv4i32: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.s32 r0, q0 -; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec) - %cmp = icmp sgt i32 %max, %x - %1 = select i1 %cmp, i32 %max, i32 %x - ret i32 %1 -} - -define arm_aapcs_vfpcc signext i8 @mismatch_smaxv16i8(<16 x i8> %vec, i8 signext %max) { -; CHECK-LABEL: mismatch_smaxv16i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: mvn r1, #127 -; CHECK-NEXT: vmaxv.s8 r1, q0 -; CHECK-NEXT: sxtb r2, r1 -; CHECK-NEXT: cmp r2, r0 -; CHECK-NEXT: csel r0, r0, r1, gt -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) - %cmp = icmp sgt i8 %x, %max - %1 = select i1 %cmp, i8 %max, i8 %x - ret i8 %1 -} - -define arm_aapcs_vfpcc signext i8 @mismatch2_smaxv16i8(<16 x i8> %vec, i8 signext %max) { -; CHECK-LABEL: mismatch2_smaxv16i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: mvn r1, #127 -; CHECK-NEXT: vmaxv.s8 r1, q0 -; CHECK-NEXT: sxtb r2, r1 -; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: csel r0, r1, r0, gt -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) - %cmp = icmp sgt i8 %max, %x - %1 = select i1 %cmp, i8 %x, i8 %max - ret i8 %1 -} - -define arm_aapcs_vfpcc zeroext i8 @inverted_uminv16i8(<16 x i8> %vec, i8 zeroext %min) { -; CHECK-LABEL: inverted_uminv16i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.u8 r0, q0 -; CHECK-NEXT: uxtb r0, r0 -; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec) - %cmp = icmp ugt i8 %x, %min - %1 = select i1 %cmp, i8 %min, i8 %x - ret i8 %1 -} - -define arm_aapcs_vfpcc zeroext i16 @inverted_uminv8i16(<8 x i16> %vec, i16 zeroext %min) { -; CHECK-LABEL: inverted_uminv8i16: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.u16 r0, q0 -; CHECK-NEXT: uxth r0, r0 -; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec) - %cmp = icmp ugt i16 %x, %min - %1 = select i1 %cmp, i16 %min, i16 %x - ret i16 %1 -} - -define arm_aapcs_vfpcc i32 @inverted_uminv4i32(<4 x i32> %vec, i32 %min) { -; CHECK-LABEL: inverted_uminv4i32: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.u32 r0, q0 -; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec) - %cmp = icmp ugt i32 %x, %min - %1 = select i1 %cmp, i32 %min, i32 %x - ret i32 %1 -} - -define arm_aapcs_vfpcc signext i8 @inverted_sminv16i8(<16 x i8> %vec, i8 signext %min) { -; CHECK-LABEL: inverted_sminv16i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.s8 r0, q0 -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec) - %cmp = icmp sgt i8 %x, %min - %1 = select i1 %cmp, i8 %min, i8 %x - ret i8 %1 -} - -define arm_aapcs_vfpcc signext i16 @inverted_sminv8i16(<8 x i16> %vec, i16 signext %min) { -; CHECK-LABEL: inverted_sminv8i16: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.s16 r0, q0 -; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec) - %cmp = icmp sgt i16 %x, %min - %1 = select i1 %cmp, i16 %min, i16 %x - ret i16 %1 -} - -define arm_aapcs_vfpcc i32 @inverted_sminv4i32(<4 x i32> %vec, i32 %min) { -; CHECK-LABEL: inverted_sminv4i32: -; CHECK: @ %bb.0: -; CHECK-NEXT: vminv.s32 r0, q0 -; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec) - %cmp = icmp sgt i32 %x, %min - %1 = select i1 %cmp, i32 %min, i32 %x - ret i32 %1 -} - -define arm_aapcs_vfpcc zeroext i8 @inverted_umaxv16i8(<16 x i8> %vec, i8 zeroext %max) { -; CHECK-LABEL: inverted_umaxv16i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.u8 r0, q0 -; CHECK-NEXT: uxtb r0, r0 -; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec) - %cmp = icmp ult i8 %x, %max - %1 = select i1 %cmp, i8 %max, i8 %x - ret i8 %1 -} - -define arm_aapcs_vfpcc zeroext i16 @inverted_umaxv8i16(<8 x i16> %vec, i16 zeroext %max) { -; CHECK-LABEL: inverted_umaxv8i16: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.u16 r0, q0 -; CHECK-NEXT: uxth r0, r0 -; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec) - %cmp = icmp ult i16 %x, %max - %1 = select i1 %cmp, i16 %max, i16 %x - ret i16 %1 -} - -define arm_aapcs_vfpcc i32 @inverted_umaxv4i32(<4 x i32> %vec, i32 %max) { -; CHECK-LABEL: inverted_umaxv4i32: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.u32 r0, q0 -; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec) - %cmp = icmp ult i32 %x, %max - %1 = select i1 %cmp, i32 %max, i32 %x - ret i32 %1 -} - -define arm_aapcs_vfpcc signext i8 @inverted_smaxv16i8(<16 x i8> %vec, i8 signext %max) { -; CHECK-LABEL: inverted_smaxv16i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.s8 r0, q0 -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) - %cmp = icmp slt i8 %x, %max - %1 = select i1 %cmp, i8 %max, i8 %x - ret i8 %1 -} - -define arm_aapcs_vfpcc signext i16 @inverted_smaxv8i16(<8 x i16> %vec, i16 signext %max) { -; CHECK-LABEL: inverted_smaxv8i16: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.s16 r0, q0 -; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec) - %cmp = icmp slt i16 %x, %max - %1 = select i1 %cmp, i16 %max, i16 %x - ret i16 %1 -} - -define arm_aapcs_vfpcc i32 @inverted_smaxv4i32(<4 x i32> %vec, i32 %max) { -; CHECK-LABEL: inverted_smaxv4i32: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmaxv.s32 r0, q0 -; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec) - %cmp = icmp slt i32 %x, %max - %1 = select i1 %cmp, i32 %max, i32 %x - ret i32 %1 -} - -define arm_aapcs_vfpcc signext i16 @trunc_and_sext(<8 x i16> %vec, i32 %max) #1 { -; CHECK-LABEL: trunc_and_sext: -; CHECK: @ %bb.0: -; CHECK-NEXT: movw r1, #32768 -; CHECK-NEXT: movt r1, #65535 -; CHECK-NEXT: vmaxv.s16 r1, q0 -; CHECK-NEXT: sxth r2, r1 -; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: csel r0, r0, r1, gt -; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec) - %xs = sext i16 %x to i32 - %cmp = icmp sgt i32 %max, %xs - %mt = trunc i32 %max to i16 - %1 = select i1 %cmp, i16 %mt, i16 %x - ret i16 %1 -} - -define arm_aapcs_vfpcc signext i16 @trunc_and_zext(<8 x i16> %vec, i32 %max) #1 { -; CHECK-LABEL: trunc_and_zext: -; CHECK: @ %bb.0: -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vmaxv.u16 r1, q0 -; CHECK-NEXT: uxth r2, r1 -; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: csel r0, r0, r1, gt -; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec) - %xs = zext i16 %x to i32 - %cmp = icmp sgt i32 %max, %xs - %mt = trunc i32 %max to i16 - %1 = select i1 %cmp, i16 %mt, i16 %x - ret i16 %1 -} - -define arm_aapcs_vfpcc i64 @uminv2i64(<2 x i64> %vec, i64 %min) { -; CHECK-LABEL: uminv2i64: -; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: vmov r12, s3 -; CHECK-NEXT: vmov lr, s1 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov r3, s2 -; CHECK-NEXT: cmp lr, r12 -; CHECK-NEXT: csel r4, r2, r3, lo -; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: csel r2, r2, r3, lo -; CHECK-NEXT: cmp lr, r12 -; CHECK-NEXT: csel r5, r2, r4, eq -; CHECK-NEXT: csel r3, lr, r12, lo -; CHECK-NEXT: subs r2, r5, r0 -; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: sbcs.w r2, r3, r1 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r5, r0, ne -; CHECK-NEXT: csel r1, r3, r1, ne -; CHECK-NEXT: pop {r4, r5, r7, pc} - %x = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %vec) - %cmp = icmp ult i64 %x, %min - %1 = select i1 %cmp, i64 %x, i64 %min - ret i64 %1 -} - -define arm_aapcs_vfpcc i64 @sminv2i64(<2 x i64> %vec, i64 %min) { -; CHECK-LABEL: sminv2i64: -; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: vmov r12, s3 -; CHECK-NEXT: vmov lr, s1 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov r3, s2 -; CHECK-NEXT: cmp lr, r12 -; CHECK-NEXT: csel r4, r2, r3, lt -; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: csel r2, r2, r3, lo -; CHECK-NEXT: cmp lr, r12 -; CHECK-NEXT: csel r5, r2, r4, eq -; CHECK-NEXT: csel r3, lr, r12, lt -; CHECK-NEXT: subs r2, r5, r0 -; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: sbcs.w r2, r3, r1 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r5, r0, ne -; CHECK-NEXT: csel r1, r3, r1, ne -; CHECK-NEXT: pop {r4, r5, r7, pc} - %x = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %vec) - %cmp = icmp slt i64 %x, %min - %1 = select i1 %cmp, i64 %x, i64 %min - ret i64 %1 -} - -define arm_aapcs_vfpcc i64 @umaxv2i64(<2 x i64> %vec, i64 %max) { -; CHECK-LABEL: umaxv2i64: -; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: vmov r12, s3 -; CHECK-NEXT: vmov lr, s1 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov r3, s2 -; CHECK-NEXT: cmp lr, r12 -; CHECK-NEXT: csel r4, r2, r3, hi -; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: csel r2, r2, r3, hi -; CHECK-NEXT: cmp lr, r12 -; CHECK-NEXT: csel r5, r2, r4, eq -; CHECK-NEXT: csel r3, lr, r12, hi -; CHECK-NEXT: subs r2, r0, r5 -; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: sbcs.w r2, r1, r3 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r5, r0, ne -; CHECK-NEXT: csel r1, r3, r1, ne -; CHECK-NEXT: pop {r4, r5, r7, pc} - %x = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %vec) - %cmp = icmp ugt i64 %x, %max - %1 = select i1 %cmp, i64 %x, i64 %max - ret i64 %1 -} - -define arm_aapcs_vfpcc i64 @smaxv2i64(<2 x i64> %vec, i64 %max) { -; CHECK-LABEL: smaxv2i64: -; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: vmov r12, s3 -; CHECK-NEXT: vmov lr, s1 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov r3, s2 -; CHECK-NEXT: cmp lr, r12 -; CHECK-NEXT: csel r4, r2, r3, gt -; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: csel r2, r2, r3, hi -; CHECK-NEXT: cmp lr, r12 -; CHECK-NEXT: csel r5, r2, r4, eq -; CHECK-NEXT: csel r3, lr, r12, gt -; CHECK-NEXT: subs r2, r0, r5 -; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: sbcs.w r2, r1, r3 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r5, r0, ne -; CHECK-NEXT: csel r1, r3, r1, ne -; CHECK-NEXT: pop {r4, r5, r7, pc} - %x = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %vec) - %cmp = icmp sgt i64 %x, %max - %1 = select i1 %cmp, i64 %x, i64 %max - ret i64 %1 -} - -declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>) - -declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>) - -declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) - -declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>) - -declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>) - -declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>) - -declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) - -declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>) - -declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>) - -declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>) - -declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>) - -declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>) - -declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>) - -declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>) - -declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) - -declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>) From 8fb4645321765124aa89c8e798a989eae38739ce Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 5 Oct 2020 18:51:12 +0100 Subject: [PATCH 048/321] [InstCombine] FoldShiftByConstant - use m_Specific. NFCI. Use m_Specific instead of m_Value followed by an equality check - we already do this for the similar folds above, it looks like an oversight in rG2b459fe7e1e where the original pattern match code looked a little different. --- .../lib/Transforms/InstCombine/InstCombineShifts.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 6a4ecd21ea47f..b02e4eaa3ceef 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -730,7 +730,7 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, if (Op0->hasOneUse()) { if (BinaryOperator *Op0BO = dyn_cast(Op0)) { // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) - Value *V1, *V2; + Value *V1; ConstantInt *CC; switch (Op0BO->getOpcode()) { default: break; @@ -795,14 +795,13 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && match(Op0BO->getOperand(0), - m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))), - m_ConstantInt(CC))) && V2 == Op1) { + m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))), + m_ConstantInt(CC)))) { Value *YS = // (Y << C) - Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // X & (CC << C) Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1), - V1->getName()+".mask"); - + V1->getName() + ".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); } @@ -810,7 +809,6 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, } } - // If the operand is a bitwise operator with a constant RHS, and the // shift is the only use, we can pull it out of the shift. const APInt *Op0C; From c2bce848ecae0e17883e7cc4b8054c1e12ef1f98 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Tue, 29 Sep 2020 12:22:30 -0700 Subject: [PATCH 049/321] [GlobalISel] Fix CSEMIRBuilder silently allowing use-before-def. If a CSEMIRBuilder query hits the instruction at the current insert point, move insert point ahead one so that subsequent uses of the builder don't end up with uses before defs. This fix also shows that AMDGPU was also affected by this bug often, but got away with it because it was using a G_IMPLICIT_DEF before the use. Differential Revision: https://reviews.llvm.org/D88605 --- llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp | 8 +- .../GlobalISel/legalize-implicit-def.mir | 4 +- .../GlobalISel/legalize-load-constant.mir | 60 +++---- .../AMDGPU/GlobalISel/legalize-load-flat.mir | 60 +++---- .../GlobalISel/legalize-load-global.mir | 160 +++++++++--------- .../AMDGPU/GlobalISel/legalize-load-local.mir | 64 +++---- .../GlobalISel/legalize-load-private.mir | 48 +++--- llvm/unittests/CodeGen/GlobalISel/CSETest.cpp | 27 +++ 8 files changed, 232 insertions(+), 199 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 5441357e5fbe4..1a3b814075f80 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -42,8 +42,14 @@ CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID, if (MI) { CSEInfo->countOpcodeHit(MI->getOpcode()); auto CurrPos = getInsertPt(); - if (!dominates(MI, CurrPos)) + auto MII = MachineBasicBlock::iterator(MI); + if (MII == CurrPos) { + // Move the insert point ahead of the instruction so any future uses of + // this builder will have the def ready. + setInsertPt(*CurMBB, std::next(MII)); + } else if (!dominates(MI, CurrPos)) { CurMBB->splice(CurrPos, CurMBB, MI); + } return MachineInstrBuilder(getMF(), MI); } return MachineInstrBuilder(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir index 0c11b103a4ff5..f4192bfa7f8cb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -440,9 +440,9 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v3s16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %1(<4 x s16>), %1(<4 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir index 131a81f8fe58f..60152da35f6e0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -6097,41 +6097,41 @@ body: | ; CI-LABEL: name: test_load_constant_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_constant_v3s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v3s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 @@ -6210,9 +6210,9 @@ body: | ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6243,9 +6243,9 @@ body: | ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6270,9 +6270,9 @@ body: | ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6303,9 +6303,9 @@ body: | ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6330,9 +6330,9 @@ body: | ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6405,9 +6405,9 @@ body: | ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6461,9 +6461,9 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6514,9 +6514,9 @@ body: | ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6576,9 +6576,9 @@ body: | ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6629,9 +6629,9 @@ body: | ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index 6aa49dae11b87..fcf4f91fd4f73 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -5929,41 +5929,41 @@ body: | ; CI-LABEL: name: test_load_flat_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_flat_v3s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v3s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 @@ -6042,9 +6042,9 @@ body: | ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6075,9 +6075,9 @@ body: | ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6102,9 +6102,9 @@ body: | ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6135,9 +6135,9 @@ body: | ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6162,9 +6162,9 @@ body: | ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6237,9 +6237,9 @@ body: | ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6293,9 +6293,9 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6346,9 +6346,9 @@ body: | ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6408,9 +6408,9 @@ body: | ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6461,9 +6461,9 @@ body: | ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir index 63e1285c0fa4f..23a148113d9e5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -6181,49 +6181,49 @@ body: | ; SI-LABEL: name: test_load_global_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CI-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_global_v3s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_global_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 @@ -6314,9 +6314,9 @@ body: | ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 + 4, addrspace 1) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6353,9 +6353,9 @@ body: | ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 + 4, addrspace 1) ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6386,9 +6386,9 @@ body: | ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 + 4, addrspace 1) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6419,9 +6419,9 @@ body: | ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 + 4, addrspace 1) ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6494,9 +6494,9 @@ body: | ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6562,9 +6562,9 @@ body: | ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6618,9 +6618,9 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6677,9 +6677,9 @@ body: | ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -7425,9 +7425,9 @@ body: | ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 + 8, align 8, addrspace 1) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD]](<4 x s16>), 0 ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7556,9 +7556,9 @@ body: | ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 + 8, align 4, addrspace 1) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD]](<4 x s16>), 0 ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7713,9 +7713,9 @@ body: | ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 + 8, addrspace 1) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7803,9 +7803,9 @@ body: | ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 + 8, addrspace 1) ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) ; CI-MESA: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[CONCAT_VECTORS]](<4 x s16>), 0 ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7881,9 +7881,9 @@ body: | ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 + 8, addrspace 1) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7961,9 +7961,9 @@ body: | ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 + 8, addrspace 1) ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -8091,9 +8091,9 @@ body: | ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -8228,9 +8228,9 @@ body: | ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) ; CI-MESA: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[CONCAT_VECTORS]](<4 x s16>), 0 ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -8343,9 +8343,9 @@ body: | ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16) ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -8465,9 +8465,9 @@ body: | ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16) ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>), %10(<6 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -9722,9 +9722,9 @@ body: | ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 + 12, addrspace 1) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %63(<4 x s16>), %63(<4 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST2]](<2 x s16>), 0 ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -9734,9 +9734,9 @@ body: | ; SI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) ; SI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>) - ; SI: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>) ; SI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) + ; SI: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>) ; SI: [[INSERT4:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV12]](<7 x s16>), 0 ; SI: [[INSERT5:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT4]], [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) @@ -9846,9 +9846,9 @@ body: | ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 + 12, addrspace 1) ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %63(<4 x s16>), %63(<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST2]](<2 x s16>), 0 ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -9858,9 +9858,9 @@ body: | ; CI-MESA: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; CI-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) ; CI-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>) - ; CI-MESA: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>) ; CI-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) + ; CI-MESA: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>) ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV12]](<7 x s16>), 0 ; CI-MESA: [[INSERT5:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT4]], [[CONCAT_VECTORS]](<4 x s16>), 0 ; CI-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) @@ -9957,9 +9957,9 @@ body: | ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 + 12, addrspace 1) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %63(<4 x s16>), %63(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST2]](<2 x s16>), 0 ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -9969,9 +9969,9 @@ body: | ; VI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) ; VI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>) - ; VI: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>) ; VI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) + ; VI: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>) ; VI: [[INSERT4:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV12]](<7 x s16>), 0 ; VI: [[INSERT5:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT4]], [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) @@ -10067,9 +10067,9 @@ body: | ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 + 12, addrspace 1) ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %63(<4 x s16>), %63(<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>), 0 ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -10079,9 +10079,9 @@ body: | ; GFX9-MESA: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>) - ; GFX9-MESA: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>) ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) + ; GFX9-MESA: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>) ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV12]](<7 x s16>), 0 ; GFX9-MESA: [[INSERT5:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT4]], [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) @@ -10245,9 +10245,9 @@ body: | ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %63(<4 x s16>), %63(<4 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST2]](<2 x s16>), 0 ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -10257,9 +10257,9 @@ body: | ; SI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) ; SI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>) - ; SI: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>) ; SI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) + ; SI: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>) ; SI: [[INSERT4:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV12]](<7 x s16>), 0 ; SI: [[INSERT5:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT4]], [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) @@ -10434,9 +10434,9 @@ body: | ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %63(<4 x s16>), %63(<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST2]](<2 x s16>), 0 ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -10446,9 +10446,9 @@ body: | ; CI-MESA: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; CI-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) ; CI-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>) - ; CI-MESA: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>) ; CI-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) + ; CI-MESA: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>) ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV12]](<7 x s16>), 0 ; CI-MESA: [[INSERT5:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT4]], [[CONCAT_VECTORS]](<4 x s16>), 0 ; CI-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) @@ -10596,9 +10596,9 @@ body: | ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C2]](s16) ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %63(<4 x s16>), %63(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST2]](<2 x s16>), 0 ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -10608,9 +10608,9 @@ body: | ; VI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) ; VI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>) - ; VI: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>) ; VI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) + ; VI: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>) ; VI: [[INSERT4:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV12]](<7 x s16>), 0 ; VI: [[INSERT5:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT4]], [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) @@ -10764,9 +10764,9 @@ body: | ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C2]](s16) ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %63(<4 x s16>), %63(<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>), 0 ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -10776,9 +10776,9 @@ body: | ; GFX9-MESA: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>), %12(<8 x s16>) - ; GFX9-MESA: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>) ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) + ; GFX9-MESA: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>) ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV12]](<7 x s16>), 0 ; GFX9-MESA: [[INSERT5:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT4]], [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir index 3f0f84a0e9b66..c66c5dc8c4632 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -6686,49 +6686,49 @@ body: | ; SI-LABEL: name: test_load_local_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_local_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v3s16_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_local_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p3) = COPY $vgpr0 @@ -6764,9 +6764,9 @@ body: | ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 + 4, addrspace 3) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6797,9 +6797,9 @@ body: | ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 + 4, addrspace 3) ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6830,9 +6830,9 @@ body: | ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 + 4, addrspace 3) ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-DS128: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI-DS128: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6863,9 +6863,9 @@ body: | ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 + 4, addrspace 3) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6890,9 +6890,9 @@ body: | ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 + 4, addrspace 3) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -6971,9 +6971,9 @@ body: | ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -7033,9 +7033,9 @@ body: | ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -7095,9 +7095,9 @@ body: | ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-DS128: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI-DS128: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -7151,9 +7151,9 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -7204,9 +7204,9 @@ body: | ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir index d85d450dba84e..38494703e2db0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -5203,9 +5203,9 @@ body: | ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 + 4, align 4, addrspace 5) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[LOAD]](<2 x s16>), 0 ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -5224,9 +5224,9 @@ body: | ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 + 4, align 4, addrspace 5) ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[LOAD]](<2 x s16>), 0 ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -5245,9 +5245,9 @@ body: | ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 + 4, align 4, addrspace 5) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[LOAD]](<2 x s16>), 0 ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -5266,9 +5266,9 @@ body: | ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 + 4, align 4, addrspace 5) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[LOAD]](<2 x s16>), 0 ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -5312,9 +5312,9 @@ body: | ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 + 4, addrspace 5) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -5345,9 +5345,9 @@ body: | ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 + 4, addrspace 5) ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -5378,9 +5378,9 @@ body: | ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 + 4, addrspace 5) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -5405,9 +5405,9 @@ body: | ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 + 4, addrspace 5) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -5480,9 +5480,9 @@ body: | ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -5542,9 +5542,9 @@ body: | ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -5598,9 +5598,9 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) @@ -5651,9 +5651,9 @@ body: | ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) diff --git a/llvm/unittests/CodeGen/GlobalISel/CSETest.cpp b/llvm/unittests/CodeGen/GlobalISel/CSETest.cpp index 0ef7b3d207fef..1bca56ff7a727 100644 --- a/llvm/unittests/CodeGen/GlobalISel/CSETest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/CSETest.cpp @@ -8,6 +8,7 @@ #include "GISelMITest.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" +#include "gtest/gtest.h" namespace { @@ -136,4 +137,30 @@ TEST_F(AArch64GISelMITest, TestCSEConstantConfig) { auto Undef1 = CSEB.buildUndef(s16); EXPECT_EQ(&*Undef0, &*Undef1); } + +TEST_F(AArch64GISelMITest, TestCSEImmediateNextCSE) { + setUp(); + if (!TM) + return; + + LLT s32{LLT::scalar(32)}; + // We want to check that when the CSE hit is on the next instruction, i.e. at + // the current insert pt, that the insertion point is moved ahead of the + // instruction. + + GISelCSEInfo CSEInfo; + CSEInfo.setCSEConfig(std::make_unique()); + CSEInfo.analyze(*MF); + B.setCSEInfo(&CSEInfo); + CSEMIRBuilder CSEB(B.getState()); + CSEB.buildConstant(s32, 0); + auto MIBCst2 = CSEB.buildConstant(s32, 2); + + // Move the insert point before the second constant. + CSEB.setInsertPt(CSEB.getMBB(), --CSEB.getInsertPt()); + auto MIBCst3 = CSEB.buildConstant(s32, 2); + EXPECT_TRUE(&*MIBCst2 == &*MIBCst3); + EXPECT_TRUE(CSEB.getInsertPt() == CSEB.getMBB().end()); +} + } // namespace From 6e25586990b93e2c9eaaa4f473b6720ccd646c46 Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Mon, 5 Oct 2020 11:02:13 -0700 Subject: [PATCH 050/321] [llvm-objcopy][MachO] Add missing std::move. This change fixes the build issue introduced by 32c8435ef7 (detected by the buildbot fuchsia-x86_64-linux). Test plan: make check-all --- llvm/tools/llvm-objcopy/llvm-objcopy.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp index 8cd58572f5a19..e1f1ed8f3cc1b 100644 --- a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp @@ -187,7 +187,7 @@ createNewArchiveMembers(CopyConfig &Config, const Archive &Ar) { } if (Err) return createFileError(Config.InputFilename, std::move(Err)); - return NewArchiveMembers; + return std::move(NewArchiveMembers); } } // end namespace objcopy From fa0293081d62295140f8518cc7127473d6856cda Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Wed, 26 Aug 2020 10:51:56 +0100 Subject: [PATCH 051/321] [SVE][CodeGen] Fix TypeSize/ElementCount related warnings in sve-split-store.ll I have fixed up a number of warnings resulting from TypeSize -> uint64_t casts and calling getVectorNumElements() on scalable vector types. I think most of the changes are fairly trivial except for those in DAGTypeLegalizer::SplitVecRes_MSTORE I've tried to ensure we create the MachineMemoryOperands in a sensible way for scalable vectors. I have added a CHECK line to the following test: CodeGen/AArch64/sve-split-store.ll that ensures no new warnings are added. Differential Revision: https://reviews.llvm.org/D86928 --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 17 ++++++++-------- .../SelectionDAG/LegalizeVectorTypes.cpp | 20 ++++++++++++++----- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 9 +++++++-- llvm/test/CodeGen/AArch64/sve-split-store.ll | 8 ++++++-- 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 83ade2d2fdca3..ea44fb2e722a7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -542,28 +542,29 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { LLVM_DEBUG(dbgs() << "Legalizing truncating store operations\n"); SDValue Value = ST->getValue(); EVT StVT = ST->getMemoryVT(); - unsigned StWidth = StVT.getSizeInBits(); + TypeSize StWidth = StVT.getSizeInBits(); + TypeSize StSize = StVT.getStoreSizeInBits(); auto &DL = DAG.getDataLayout(); - if (StWidth != StVT.getStoreSizeInBits()) { + if (StWidth != StSize) { // Promote to a byte-sized store with upper bits zero if not // storing an integral number of bytes. For example, promote // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), - StVT.getStoreSizeInBits()); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StSize.getFixedSize()); Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, ST->getOriginalAlign(), MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); - } else if (StWidth & (StWidth - 1)) { + } else if (!StVT.isVector() && !isPowerOf2_64(StWidth.getFixedSize())) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); - unsigned LogStWidth = Log2_32(StWidth); + unsigned StWidthBits = StWidth.getFixedSize(); + unsigned LogStWidth = Log2_32(StWidthBits); assert(LogStWidth < 32); unsigned RoundWidth = 1 << LogStWidth; - assert(RoundWidth < StWidth); - unsigned ExtraWidth = StWidth - RoundWidth; + assert(RoundWidth < StWidthBits); + unsigned ExtraWidth = StWidthBits - RoundWidth; assert(ExtraWidth < RoundWidth); assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && "Store size not an integral number of bytes!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 8e14a73e7ea1e..dbb01741853a4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2425,9 +2425,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty); SDValue Lo, Hi, Res; + unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize()); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, N->getAAInfo(), N->getRanges()); + N->getPointerInfo(), MachineMemOperand::MOStore, LoSize, Alignment, + N->getAAInfo(), N->getRanges()); Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), @@ -2441,11 +2442,20 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, N->isCompressingStore()); - unsigned HiOffset = LoMemVT.getStoreSize(); + MachinePointerInfo MPI; + if (LoMemVT.isScalableVector()) { + Alignment = commonAlignment( + Alignment, LoMemVT.getSizeInBits().getKnownMinSize() / 8); + MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); + } else + MPI = N->getPointerInfo().getWithOffset( + LoMemVT.getStoreSize().getFixedSize()); + + unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize()); MMO = DAG.getMachineFunction().getMachineMemOperand( - N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore, - HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); + MPI, MachineMemOperand::MOStore, HiSize, Alignment, N->getAAInfo(), + N->getRanges()); Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 62d01fbf96cdf..b51e5b9981d1d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2214,6 +2214,10 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, /// SimplifyMultipleUseDemandedBits and not generate any new nodes. SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) { EVT VT = V.getValueType(); + + if (VT.isScalableVector()) + return SDValue(); + APInt DemandedElts = VT.isVector() ? APInt::getAllOnesValue(VT.getVectorNumElements()) : APInt(1, 1); @@ -7135,7 +7139,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, SVT.getStoreSize(), Alignment, AAInfo); + PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()), + Alignment, AAInfo); return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); } @@ -7156,7 +7161,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, assert(VT.isVector() == SVT.isVector() && "Cannot use trunc store to convert to or from a vector!"); assert((!VT.isVector() || - VT.getVectorNumElements() == SVT.getVectorNumElements()) && + VT.getVectorElementCount() == SVT.getVectorElementCount()) && "Cannot use trunc store to change the number of vector elements!"); SDVTList VTs = getVTList(MVT::Other); diff --git a/llvm/test/CodeGen/AArch64/sve-split-store.ll b/llvm/test/CodeGen/AArch64/sve-split-store.ll index a3a9b8b53ec70..fad5d78aeef61 100644 --- a/llvm/test/CodeGen/AArch64/sve-split-store.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-store.ll @@ -1,5 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning ; UNPREDICATED @@ -113,10 +117,10 @@ define void @masked_store_split_8i64( %data, Date: Thu, 24 Sep 2020 21:18:24 +0000 Subject: [PATCH 052/321] docs: add documentation describing API Notes API Notes are a feature which allows annotation of headers by an auxiliary file that contains metadata for declarations pertaining to the associated module. This enables adding attributes to declarations without requiring modification of the headers, enabling finer grained control for library headers for consumers without having to modify external headers. Differential Revision: https://reviews.llvm.org/D88446 Reviewed By: Richard Smith, Marcel Hlopko --- clang/docs/APINotes.rst | 363 ++++++++++++++++++ .../Headers/SomeKit.apinotes | 98 +++++ .../SomeKit.framework/Headers/SomeKit.h | 60 +++ .../Headers/SomeKitExplicitNullability.h | 4 + .../Headers/SomeKit_private.apinotes | 15 + 5 files changed, 540 insertions(+) create mode 100644 clang/docs/APINotes.rst create mode 100644 clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKit.apinotes create mode 100644 clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKit.h create mode 100644 clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKitExplicitNullability.h create mode 100644 clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKit_private.apinotes diff --git a/clang/docs/APINotes.rst b/clang/docs/APINotes.rst new file mode 100644 index 0000000000000..4ac4c01cdefba --- /dev/null +++ b/clang/docs/APINotes.rst @@ -0,0 +1,363 @@ +================================================ +API Notes: Annotations Without Modifying Headers +================================================ + +**The Problem:** You have headers you want to use, but you also want to add +extra information to the API. You don't want to put that information in the +headers themselves --- perhaps because you want to keep them clean for other +clients, or perhaps because they're from some open source project and you don't +want to modify them at all. + +**Incomplete solution:** Redeclare all the interesting parts of the API in your +own header and add the attributes you want. Unfortunately, this: + +* doesn't work with attributes that must be present on a definition +* doesn't allow changing the definition in other ways +* requires your header to be included in any client code to take effect + +**Better solution:** Provide a "sidecar" file with the information you want to +add, and have that automatically get picked up by the module-building logic in +the compiler. + +That's API notes. + +API notes use a YAML-based file format. YAML is a format best explained by +example, so here is a `small example`__ from the compiler test suite of API +notes for a hypothetical "SomeKit" framework. + +__ test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKit.apinotes + + +Usage +===== + +API notes files are found relative to the module map that defines a module, +under the name "SomeKit.apinotes" for a module named "SomeKit". Additionally, a +file named "SomeKit_private.apinotes" will also be picked up to go with a +private module map. For bare modules these two files will be in the same +directory as the corresponding module map; for framework modules, they should +be placed in the Headers and PrivateHeaders directories, respectively. The +module map for a private top-level framework module should be placed in the +PrivateHeaders directory as well, though it does not need an additional +"_private" suffix on its name. + +Clang will search for API notes files next to module maps only when passed the +``-fapi-notes-modules`` option. + + +Limitations +=========== + +- Since they're identified by module name, API notes cannot be used to modify + arbitrary textual headers. + + +"Versioned" API Notes +===================== + +Many API notes affect how a C API is imported into Swift. In order to change +that behavior while still remaining backwards-compatible, API notes can be +selectively applied based on the Swift compatibility version provided to the +compiler (e.g. ``-fapi-notes-swift-version=5``). The rule is that an +explicitly-versioned API note applies to that version *and all earlier +versions,* and any applicable explicitly-versioned API note takes precedence +over an unversioned API note. + + +Reference +========= + +An API notes file contains a YAML dictionary with the following top-level +entries: + +:Name: + + The name of the module (the framework name, for frameworks). Note that this + is always the name of a top-level module, even within a private API notes + file. + + :: + + Name: MyFramework + +:Classes, Protocols, Tags, Typedefs, Globals, Enumerators, Functions: + + Arrays of top-level declarations. Each entry in the array must have a + 'Name' key with its Objective-C name. "Tags" refers to structs, enums, and + unions; "Enumerators" refers to enum cases. + + :: + + Classes: + - Name: MyController + … + - Name: MyView + … + +:SwiftVersions: + + Contains explicit information for backwards compatibility. Each entry in + the array contains a 'Version' key, which should be set to '4' for + annotations that only apply to Swift 4 mode and earlier. The other entries + in this dictionary are the same declaration entries as at the top level: + Classes, Protocols, Tags, Typedefs, Globals, Enumerators, and Functions. + + :: + + SwiftVersions: + - Version: 4 + Classes: … + Protocols: … + +Each entry under 'Classes' and 'Protocols' can contain "Methods" and +"Properties" arrays, in addition to the attributes described below: + +:Methods: + + Identified by 'Selector' and 'MethodKind'; the MethodKind is either + "Instance" or "Class". + + :: + + Classes: + - Name: UIViewController + Methods: + - Selector: "presentViewController:animated:" + MethodKind: Instance + … + +:Properties: + + Identified by 'Name' and 'PropertyKind'; the PropertyKind is also either + "Instance" or "Class". + + :: + + Classes: + - Name: UIView + Properties: + - Name: subviews + PropertyKind: Instance + … + +Each declaration supports the following annotations (if relevant to that +declaration kind), all of which are optional: + +:SwiftName: + + Equivalent to ``NS_SWIFT_NAME``. For a method, must include the full Swift name + with all arguments. Use "_" to omit an argument label. + + :: + + - Selector: "presentViewController:animated:" + MethodKind: Instance + SwiftName: "present(_:animated:)" + + - Class: NSBundle + SwiftName: Bundle + +:Availability, AvailabilityMsg: + + A value of "nonswift" is equivalent to ``NS_SWIFT_UNAVAILABLE``. A value of + "available" can be used in the "SwiftVersions" section to undo the effect of + "nonswift". + + :: + + - Selector: "dealloc" + MethodKind: Instance + Availability: nonswift + AvailabilityMsg: "prefer 'deinit'" + +:SwiftPrivate: + + Equivalent to NS_REFINED_FOR_SWIFT. + + :: + + - Name: CGColorEqualToColor + SwiftPrivate: true + +:Nullability: + + Used for properties and globals. There are four options, identified by their + initials: + + - ``Nonnull`` or ``N`` (corresponding to ``_Nonnull``) + - ``Optional`` or ``O`` (corresponding to ``_Nullable``) + - ``Unspecified`` or ``U`` (corresponding to ``_Null_unspecified``) + - ``Scalar`` or ``S`` (deprecated) + + Note that 'Nullability' is overridden by 'Type', even in a "SwiftVersions" + section. + + .. note:: + + 'Nullability' can also be used to describe the argument types of methods + and functions, but this usage is deprecated in favor of 'Parameters' (see + below). + + :: + + - Name: dataSource + Nullability: O + +:NullabilityOfRet: + + Used for methods and functions. Describes the nullability of the return type. + + Note that 'NullabilityOfRet' is overridden by 'ResultType', even in a + "SwiftVersions" section. + + .. warning:: + + Due to a compiler bug, 'NullabilityOfRet' may change nullability of the + parameters as well (rdar://30544062). Avoid using it and instead use + 'ResultType' and specify the return type along with a nullability + annotation (see documentation for 'ResultType'). + + :: + + - Selector: superclass + MethodKind: Class + NullabilityOfRet: O + +:Type: + + Used for properties and globals. This completely overrides the type of the + declaration; it should ideally only be used for Swift backwards + compatibility, when existing type information has been made more precise in a + header. Prefer 'Nullability' and other annotations when possible. + + We parse the specified type as if it appeared at the location of the + declaration whose type is being modified. Macros are not available and + nullability must be applied explicitly (even in an ``NS_ASSUME_NONNULL_BEGIN`` + section). + + :: + + - Name: delegate + PropertyKind: Instance + Type: "id" + +:ResultType: + + Used for methods and functions. This completely overrides the return type; it + should ideally only be used for Swift backwards compatibility, when existing + type information has been made more precise in a header. + + We parse the specified type as if it appeared at the location of the + declaration whose type is being modified. Macros are not available and + nullability must be applied explicitly (even in an ``NS_ASSUME_NONNULL_BEGIN`` + section). + + :: + + - Selector: "subviews" + MethodKind: Instance + ResultType: "NSArray * _Nonnull" + +:SwiftImportAsAccessors: + + Used for properties. If true, the property will be exposed in Swift as its + accessor methods, rather than as a computed property using ``var``. + + :: + + - Name: currentContext + PropertyKind: Class + SwiftImportAsAccessors: true + +:NSErrorDomain: + + Used for ``NSError`` code enums. The value is the name of the associated + domain ``NSString`` constant; an empty string (``""``) means the enum is a + normal enum rather than an error code. + + :: + + - Name: MKErrorCode + NSErrorDomain: MKErrorDomain + +:SwiftWrapper: + + Controls ``NS_STRING_ENUM`` and ``NS_EXTENSIBLE_STRING_ENUM``. There are three + options: + + - "struct" (extensible) + - "enum" + - "none" + + Note that even an "enum" wrapper is still presented as a struct in Swift; + it's just a "more enum-like" struct. + + :: + + - Name: AVMediaType + SwiftWrapper: none + +:EnumKind: + + Has the same effect as ``NS_ENUM`` and ``NS_OPTIONS``. There are four options: + + - "NSEnum" / "CFEnum" + - "NSClosedEnum" / "CFClosedEnum" + - "NSOptions" / "CFOptions" + - "none" + + :: + + - Name: GKPhotoSize + EnumKind: none + +:Parameters: + + Used for methods and functions. Parameters are identified by a 0-based + 'Position' and support the 'Nullability', 'NoEscape', and 'Type' keys. + + .. note:: + + Using 'Parameters' within a parameter entry to describe the parameters of a + block is not implemented. Use 'Type' on the entire parameter instead. + + :: + + - Selector: "isEqual:" + MethodKind: Instance + Parameters: + - Position: 0 + Nullability: O + +:NoEscape: + + Used only for block parameters. Equivalent to ``NS_NOESCAPE``. + + :: + + - Name: dispatch_sync + Parameters: + - Position: 0 + NoEscape: true + +:SwiftBridge: + + Used for Objective-C class types bridged to Swift value types. An empty + string ("") means a type is not bridged. Not supported outside of Apple + frameworks (the Swift side of it requires conforming to implementation-detail + protocols that are subject to change). + + :: + + - Name: NSIndexSet + SwiftBridge: IndexSet + +:DesignatedInit: + + Used for init methods. Equivalent to ``NS_DESIGNATED_INITIALIZER``. + + :: + + - Selector: "initWithFrame:" + MethodKind: Instance + DesignatedInit: true diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKit.apinotes b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKit.apinotes new file mode 100644 index 0000000000000..ff88fdbaeac83 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKit.apinotes @@ -0,0 +1,98 @@ +Name: SomeKit +Classes: + - Name: A + Methods: + - Selector: "transform:" + MethodKind: Instance + Availability: none + AvailabilityMsg: "anything but this" + - Selector: "transform:integer:" + MethodKind: Instance + NullabilityOfRet: N + Nullability: [ N, S ] + - Selector: "implicitGetOnlyInstance" + MethodKind: Instance + Availability: none + AvailabilityMsg: "getter gone" + - Selector: "implicitGetOnlyClass" + MethodKind: Class + Availability: none + AvailabilityMsg: "getter gone" + - Selector: "implicitGetSetInstance" + MethodKind: Instance + Availability: none + AvailabilityMsg: "getter gone" + - Selector: "implicitGetSetClass" + MethodKind: Class + Availability: none + AvailabilityMsg: "getter gone" + - Selector: "setImplicitGetSetInstance:" + MethodKind: Instance + Availability: none + AvailabilityMsg: "setter gone" + - Selector: "setImplicitGetSetClass:" + MethodKind: Class + Availability: none + AvailabilityMsg: "setter gone" + Properties: + - Name: intValue + PropertyKind: Instance + Availability: none + AvailabilityMsg: "wouldn't work anyway" + - Name: nonnullAInstance + PropertyKind: Instance + Nullability: N + - Name: nonnullAClass + PropertyKind: Class + Nullability: N + - Name: nonnullABoth + Nullability: N + - Name: B + Availability: none + AvailabilityMsg: "just don't" + - Name: C + Methods: + - Selector: "initWithA:" + MethodKind: Instance + DesignatedInit: true + - Name: OverriddenTypes + Methods: + - Selector: "methodToMangle:second:" + MethodKind: Instance + ResultType: 'char *' + Parameters: + - Position: 0 + Type: 'SOMEKIT_DOUBLE *' + - Position: 1 + Type: 'float *' + Properties: + - Name: intPropertyToMangle + PropertyKind: Instance + Type: 'double *' +Functions: + - Name: global_int_fun + ResultType: 'char *' + Parameters: + - Position: 0 + Type: 'double *' + - Position: 1 + Type: 'float *' +Globals: + - Name: global_int_ptr + Type: 'double (*)(int, int)' +SwiftVersions: + - Version: 3.0 + Classes: + - Name: A + Methods: + - Selector: "transform:integer:" + MethodKind: Instance + NullabilityOfRet: O + Nullability: [ O, S ] + Properties: + - Name: explicitNonnullInstance + PropertyKind: Instance + Nullability: O + - Name: explicitNullableInstance + PropertyKind: Instance + Nullability: N diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKit.h b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKit.h new file mode 100644 index 0000000000000..1a192f5432fd1 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKit.h @@ -0,0 +1,60 @@ +#ifndef SOMEKIT_H +#define SOMEKIT_H + +__attribute__((objc_root_class)) +@interface A +-(A*)transform:(A*)input; +-(A*)transform:(A*)input integer:(int)integer; + +@property (nonatomic, readonly, retain) A* someA; +@property (nonatomic, retain) A* someOtherA; + +@property (nonatomic) int intValue; +@end + +@interface B : A +@end + +@interface C : A +- (instancetype)init; +- (instancetype)initWithA:(A*)a; +@end + +@interface ProcessInfo : A ++(instancetype)processInfo; +@end + +@interface A(NonNullProperties) +@property (nonatomic, readwrite, retain) A *nonnullAInstance; +@property (class, nonatomic, readwrite, retain) A *nonnullAInstance; + +@property (nonatomic, readwrite, retain) A *nonnullAClass; +@property (class, nonatomic, readwrite, retain) A *nonnullAClass; + +@property (nonatomic, readwrite, retain) A *nonnullABoth; +@property (class, nonatomic, readwrite, retain) A *nonnullABoth; +@end + +#import + +extern int *global_int_ptr; + +int *global_int_fun(int *ptr, int *ptr2); + +#define SOMEKIT_DOUBLE double + +__attribute__((objc_root_class)) +@interface OverriddenTypes +-(int *)methodToMangle:(int *)ptr1 second:(int *)ptr2; +@property int *intPropertyToMangle; +@end + +@interface A(ImplicitGetterSetters) +@property (nonatomic, readonly, retain) A *implicitGetOnlyInstance; +@property (class, nonatomic, readonly, retain) A *implicitGetOnlyClass; + +@property (nonatomic, readwrite, retain) A *implicitGetSetInstance; +@property (class, nonatomic, readwrite, retain) A *implicitGetSetClass; +@end + +#endif diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKitExplicitNullability.h b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKitExplicitNullability.h new file mode 100644 index 0000000000000..914ab00357fce --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKitExplicitNullability.h @@ -0,0 +1,4 @@ +@interface A(ExplicitNullabilityProperties) +@property (nonatomic, readwrite, retain, nonnull) A *explicitNonnullInstance; +@property (nonatomic, readwrite, retain, nullable) A *explicitNullableInstance; +@end diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKit_private.apinotes b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKit_private.apinotes new file mode 100644 index 0000000000000..aee8c3684dc4c --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKit_private.apinotes @@ -0,0 +1,15 @@ +Name: SomeKit +Classes: + - Name: A + Methods: + - Selector: "privateTransform:input:" + MethodKind: Instance + NullabilityOfRet: N + Nullability: [ N, S ] + Properties: + - Name: internalProperty + Nullability: N +Protocols: + - Name: InternalProtocol + Availability: none + AvailabilityMsg: "not for you" From 82ebbcfb059ba755ce4ae7711a27205fbe443990 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Thu, 1 Oct 2020 16:53:23 -0700 Subject: [PATCH 053/321] [NFC][regalloc] Model weight normalization as a virtual Continuing from D88499, we can now model the normalization function as a virtual member of VirtRegAuxInfo. Note that the default (normalizeSpillWeight) is also used stand-alone in RAGreedy. Differential Revision: https://reviews.llvm.org/D88713 --- llvm/include/llvm/CodeGen/CalcSpillWeights.h | 39 +++++++++++--------- llvm/lib/CodeGen/RegAllocPBQP.cpp | 24 +++++++----- 2 files changed, 36 insertions(+), 27 deletions(-) diff --git a/llvm/include/llvm/CodeGen/CalcSpillWeights.h b/llvm/include/llvm/CodeGen/CalcSpillWeights.h index d2e79170384d0..10e9ac866bdb1 100644 --- a/llvm/include/llvm/CodeGen/CalcSpillWeights.h +++ b/llvm/include/llvm/CodeGen/CalcSpillWeights.h @@ -44,27 +44,23 @@ class VirtRegMap; /// Calculate auxiliary information for a virtual register such as its /// spill weight and allocation hint. class VirtRegAuxInfo { - public: - using NormalizingFn = float (*)(float, unsigned, unsigned); - - private: MachineFunction &MF; LiveIntervals &LIS; - VirtRegMap *VRM; + VirtRegMap *const VRM; const MachineLoopInfo &Loops; const MachineBlockFrequencyInfo &MBFI; DenseMap Hint; - NormalizingFn normalize; public: - VirtRegAuxInfo(MachineFunction &mf, LiveIntervals &lis, - VirtRegMap *vrm, const MachineLoopInfo &loops, - const MachineBlockFrequencyInfo &mbfi, - NormalizingFn norm = normalizeSpillWeight) - : MF(mf), LIS(lis), VRM(vrm), Loops(loops), MBFI(mbfi), normalize(norm) {} + VirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap *VRM, + const MachineLoopInfo &Loops, + const MachineBlockFrequencyInfo &MBFI) + : MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), MBFI(MBFI) {} + + virtual ~VirtRegAuxInfo() = default; /// (re)compute li's spill weight and allocation hint. - void calculateSpillWeightAndHint(LiveInterval &li); + void calculateSpillWeightAndHint(LiveInterval &LI); /// Compute future expected spill weight of a split artifact of li /// that will span between start and end slot indexes. @@ -75,8 +71,13 @@ class VirtRegMap; /// after end will not affect the weight. /// \return The expected spill weight of the split artifact. Returns /// negative weight for unspillable li. - float futureWeight(LiveInterval &li, SlotIndex start, SlotIndex end); + float futureWeight(LiveInterval &LI, SlotIndex Start, SlotIndex End); + + /// Compute spill weights and allocation hints for all virtual register + /// live intervals. + void calculateSpillWeightsAndHints(); + protected: /// Helper function for weight calculations. /// (Re)compute li's spill weight and allocation hint, or, for non null /// start and end - compute future expected spill weight of a split @@ -89,12 +90,14 @@ class VirtRegMap; /// after end will not affect the weight. Relevant for /// weight calculation of future split artifact. /// \return The spill weight. Returns negative weight for unspillable li. - float weightCalcHelper(LiveInterval &li, SlotIndex *start = nullptr, - SlotIndex *end = nullptr); + float weightCalcHelper(LiveInterval &LI, SlotIndex *Start = nullptr, + SlotIndex *End = nullptr); - /// Compute spill weights and allocation hints for all virtual register - /// live intervals. - void calculateSpillWeightsAndHints(); + /// Weight normalization function. + virtual float normalize(float UseDefFreq, unsigned Size, + unsigned NumInstr) { + return normalizeSpillWeight(UseDefFreq, Size, NumInstr); + } }; } // end namespace llvm diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp index eb5dec51c8d0d..2c6e01376b0c2 100644 --- a/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -518,6 +518,20 @@ class Coalescing : public PBQPRAConstraint { } }; +/// PBQP-specific implementation of weight normalization. +class PBQPVirtRegAuxInfo final : public VirtRegAuxInfo { + float normalize(float UseDefFreq, unsigned Size, unsigned NumInstr) override { + // All intervals have a spill weight that is mostly proportional to the + // number of uses, with uses in loops having a bigger weight. + return NumInstr * VirtRegAuxInfo::normalize(UseDefFreq, Size, 1); + } + +public: + PBQPVirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap *VRM, + const MachineLoopInfo &Loops, + const MachineBlockFrequencyInfo &MBFI) + : VirtRegAuxInfo(MF, LIS, VRM, Loops, MBFI) {} +}; } // end anonymous namespace // Out-of-line destructor/anchor for PBQPRAConstraint. @@ -778,13 +792,6 @@ void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) { DeadRemats.clear(); } -static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size, - unsigned NumInstr) { - // All intervals have a spill weight that is mostly proportional to the number - // of uses, with uses in loops having a bigger weight. - return NumInstr * normalizeSpillWeight(UseDefFreq, Size, 1); -} - bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { LiveIntervals &LIS = getAnalysis(); MachineBlockFrequencyInfo &MBFI = @@ -792,8 +799,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { VirtRegMap &VRM = getAnalysis(); - VirtRegAuxInfo VRAI(MF, LIS, &VRM, getAnalysis(), MBFI, - normalizePBQPSpillWeight); + PBQPVirtRegAuxInfo VRAI(MF, LIS, &VRM, getAnalysis(), MBFI); VRAI.calculateSpillWeightsAndHints(); std::unique_ptr VRegSpiller(createInlineSpiller(*this, MF, VRM)); From 95262ee2be75daffd05e9a8a985ca2c8e34c9af7 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Mon, 5 Oct 2020 13:17:52 +0200 Subject: [PATCH 054/321] [clangd] Describe non-handling of most IWYU pragmas. NFC Differential Revision: https://reviews.llvm.org/D88822 --- .../clangd/index/CanonicalIncludes.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/clang-tools-extra/clangd/index/CanonicalIncludes.h b/clang-tools-extra/clangd/index/CanonicalIncludes.h index aabfabc75368b..da7dc46db778e 100644 --- a/clang-tools-extra/clangd/index/CanonicalIncludes.h +++ b/clang-tools-extra/clangd/index/CanonicalIncludes.h @@ -71,6 +71,21 @@ class CanonicalIncludes { /// /// Currently it only supports IWYU private pragma: /// https://github.com/include-what-you-use/include-what-you-use/blob/master/docs/IWYUPragmas.md#iwyu-pragma-private +/// +/// We ignore other pragmas: +/// - keep: this is common but irrelevant: we do not currently remove includes +/// - export: this is common and potentially interesting, there are three cases: +/// * Points to a public header (common): we can suppress include2 if you +/// already have include1. Only marginally useful. +/// * Points to a private header annotated with `private` (somewhat commmon): +/// Not incrementally useful as we support private. +/// * Points to a private header without pragmas (rare). This is a reversed +/// private pragma, and is valuable but too rare to be worthwhile. +/// - no_include: this is about as common as private, but only affects the +/// current file, so the value is smaller. We could add support. +/// - friend: this is less common than private, has implementation difficulties, +/// and affects behavior in a limited scope. +/// - associated: extremely rare std::unique_ptr collectIWYUHeaderMaps(CanonicalIncludes *Includes); From 0f8e4a5ed0e556af82592f7cae4a6082ab238da3 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 5 Oct 2020 21:09:53 +0200 Subject: [PATCH 055/321] [InstCombine] Add test for PR47730 --- llvm/test/Transforms/InstCombine/select.ll | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index df506477eed1c..326050c5a200d 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -2725,6 +2725,19 @@ define i32 @select_replacement_loop2(i32 %arg, i32 %arg2) { ret i32 %sel } +define i8* @select_replacement_gep_inbounds(i8* %base, i64 %offset) { +; CHECK-LABEL: @select_replacement_gep_inbounds( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[OFFSET:%.*]], 0 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], i64 [[OFFSET]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8* [[BASE]], i8* [[GEP]] +; CHECK-NEXT: ret i8* [[SEL]] +; + %cmp = icmp eq i64 %offset, 0 + %gep = getelementptr inbounds i8, i8* %base, i64 %offset + %sel = select i1 %cmp, i8* %base, i8* %gep + ret i8* %sel +} + declare void @use(i1) declare void @use_i8(i8) declare i32 @llvm.cttz.i32(i32, i1 immarg) From 3641d375f6747237b1b55a25a55e9028d8a67a02 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 5 Oct 2020 21:13:02 +0200 Subject: [PATCH 056/321] [InstCombine] Handle GEP inbounds in select op replacement (PR47730) When retrying the "simplify with operand replaced" select optimization without poison flags, also handle inbounds on GEPs. Of course, this particular example would also be safe to transform while keeping inbounds, but the underlying machinery does not know this (yet). --- llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 8 +++++++- llvm/test/Transforms/InstCombine/select.ll | 7 +++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 087586ede8088..2501c564c3ffa 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1200,7 +1200,7 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, // InstSimplify already performed this fold if it was possible subject to // current poison-generating flags. Try the transform again with // poison-generating flags temporarily dropped. - bool WasNUW = false, WasNSW = false, WasExact = false; + bool WasNUW = false, WasNSW = false, WasExact = false, WasInBounds = false; if (auto *OBO = dyn_cast(FalseVal)) { WasNUW = OBO->hasNoUnsignedWrap(); WasNSW = OBO->hasNoSignedWrap(); @@ -1211,6 +1211,10 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, WasExact = PEO->isExact(); FalseInst->setIsExact(false); } + if (auto *GEP = dyn_cast(FalseVal)) { + WasInBounds = GEP->isInBounds(); + GEP->setIsInBounds(false); + } // Try each equivalence substitution possibility. // We have an 'EQ' comparison, so the select's false value will propagate. @@ -1230,6 +1234,8 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, FalseInst->setHasNoSignedWrap(); if (WasExact) FalseInst->setIsExact(); + if (WasInBounds) + cast(FalseInst)->setIsInBounds(); return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 326050c5a200d..987f34e52ad20 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -2725,12 +2725,11 @@ define i32 @select_replacement_loop2(i32 %arg, i32 %arg2) { ret i32 %sel } +; TODO: Dropping the inbounds flag should not be necessary for this fold. define i8* @select_replacement_gep_inbounds(i8* %base, i64 %offset) { ; CHECK-LABEL: @select_replacement_gep_inbounds( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[OFFSET:%.*]], 0 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], i64 [[OFFSET]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8* [[BASE]], i8* [[GEP]] -; CHECK-NEXT: ret i8* [[SEL]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* [[BASE:%.*]], i64 [[OFFSET:%.*]] +; CHECK-NEXT: ret i8* [[GEP]] ; %cmp = icmp eq i64 %offset, 0 %gep = getelementptr inbounds i8, i8* %base, i64 %offset From e338f8fe69b869afd8adf53919bd578aaf14fb2f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 5 Oct 2020 12:39:19 -0700 Subject: [PATCH 057/321] [gcov] Fix non-determinism (DenseMap iteration order) of checksum computation ... by using MapVector. The issue was caused by 63182c2ac0b643a60d397274e8a31166fc7243fa. Also use stable_partition instead of partition to get stable results across different STL implementations. --- .../Instrumentation/GCOVProfiling.cpp | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index c72c44809acc7..fad1ba093acf6 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -16,6 +16,7 @@ #include "CFGMST.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/Statistic.h" @@ -396,7 +397,7 @@ namespace { } GCOVBlock &getBlock(const BasicBlock *BB) { - return Blocks.find(BB)->second; + return Blocks.find(const_cast(BB))->second; } GCOVBlock &getEntryBlock() { return EntryBlock; } @@ -462,14 +463,8 @@ namespace { write(E.second); } } - std::vector Sorted; - Sorted.reserve(Blocks.size()); - for (auto &It : Blocks) - Sorted.push_back(&It.second); - llvm::sort(Sorted, [](GCOVBlock *x, GCOVBlock *y) { - return x->Number < y->Number; - }); - for (GCOVBlock &Block : make_pointee_range(Sorted)) { + for (auto &It : Blocks) { + const GCOVBlock &Block = It.second; if (Block.OutEdges.empty()) continue; write(GCOV_TAG_ARCS); @@ -482,8 +477,8 @@ namespace { } // Emit lines for each block. - for (GCOVBlock &Block : make_pointee_range(Sorted)) - Block.writeOut(); + for (auto &It : Blocks) + It.second.writeOut(); } public: @@ -492,7 +487,7 @@ namespace { uint32_t Ident; uint32_t FuncChecksum; int Version; - DenseMap Blocks; + MapVector Blocks; GCOVBlock EntryBlock; GCOVBlock ReturnBlock; }; @@ -889,8 +884,8 @@ bool GCOVProfiler::emitProfileNotes( return E->Removed || (!E->InMST && !E->Place); }); const size_t Measured = - llvm::partition(MST.AllEdges, - [](std::unique_ptr &E) { return E->Place; }) - + llvm::stable_partition( + MST.AllEdges, [](std::unique_ptr &E) { return E->Place; }) - MST.AllEdges.begin(); for (size_t I : llvm::seq(0, Measured)) { Edge &E = *MST.AllEdges[I]; From 010d7a388b146cafaf4bc0b28b952d5852d62b6a Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Sat, 3 Oct 2020 22:51:43 -0700 Subject: [PATCH 058/321] [lldb/test] Catch invalid calls to expect() Add preconditions to `TestBase.expect()` that catch semantically invalid calls that happen to succeed anyway. This also fixes the broken callsites caught by these checks. This prevents the following incorrect calls: 1. `self.expect("lldb command", "some substr")` 2. `self.expect("lldb command", "assert message", "some substr")` Differential Revision: https://reviews.llvm.org/D88792 --- lldb/packages/Python/lldbsuite/test/lldbtest.py | 16 ++++++++++++++++ .../assert_messages_test/TestAssertMessages.py | 17 +++++++++++++++++ .../diagnose/bad-reference/TestBadReference.py | 3 ++- .../TestComplicatedExpression.py | 2 +- .../TestDiagnoseDereferenceArgument.py | 2 +- .../TestDiagnoseDereferenceThis.py | 2 +- .../inheritance/TestDiagnoseInheritance.py | 3 ++- .../local-variable/TestLocalVariable.py | 3 ++- .../TestDiagnoseDereferenceVirtualMethodCall.py | 3 ++- lldb/test/API/commands/settings/TestSettings.py | 6 +++--- .../test/API/driver/batch_mode/TestBatchMode.py | 2 +- .../TestBreakpointByLineAndColumn.py | 4 ++-- .../cmtime/TestDataFormatterCMTime.py | 6 +++--- .../cpp/constructors/TestCppConstructors.py | 2 +- .../API/macosx/macCatalyst/TestMacCatalyst.py | 4 ++-- lldb/test/API/types/TestRecursiveTypes.py | 4 ++-- 16 files changed, 58 insertions(+), 21 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index 2ee82295c5538..2309b403cb995 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -2424,6 +2424,22 @@ def expect( set to False, the 'str' is treated as a string to be matched/not-matched against the golden input. """ + # Catch cases where `expect` has been miscalled. Specifically, prevent + # this easy to make mistake: + # self.expect("lldb command", "some substr") + # The `msg` parameter is used only when a failed match occurs. A failed + # match can only occur when one of `patterns`, `startstr`, `endstr`, or + # `substrs` has been given. Thus, if a `msg` is given, it's an error to + # not also provide one of the matcher parameters. + if msg and not (patterns or startstr or endstr or substrs or error): + assert False, "expect() missing a matcher argument" + + # Check `patterns` and `substrs` are not accidentally given as strings. + assert not isinstance(patterns, six.string_types), \ + "patterns must be a collection of strings" + assert not isinstance(substrs, six.string_types), \ + "substrs must be a collection of strings" + trace = (True if traceAlways else trace) if exe: diff --git a/lldb/test/API/assert_messages_test/TestAssertMessages.py b/lldb/test/API/assert_messages_test/TestAssertMessages.py index 6619a65ad69ea..f8b6b33f297c3 100644 --- a/lldb/test/API/assert_messages_test/TestAssertMessages.py +++ b/lldb/test/API/assert_messages_test/TestAssertMessages.py @@ -113,3 +113,20 @@ def test_expect(self): Expecting start string: "cat" (was not found) Reason for check goes here!""") + + # Verify expect() preconditions. + # Both `patterns` and `substrs` cannot be of type string. + self.assert_expect_fails_with("any command", + dict(patterns="some substring"), + "patterns must be a collection of strings") + self.assert_expect_fails_with("any command", + dict(substrs="some substring"), + "substrs must be a collection of strings") + # Prevent `self.expect("cmd", "substr")` + self.assert_expect_fails_with("any command", + dict(msg="some substring"), + "expect() missing a matcher argument") + # Prevent `self.expect("cmd", "msg", "substr")` + self.assert_expect_fails_with("any command", + dict(msg="a message", patterns="some substring"), + "must be a collection of strings") diff --git a/lldb/test/API/commands/frame/diagnose/bad-reference/TestBadReference.py b/lldb/test/API/commands/frame/diagnose/bad-reference/TestBadReference.py index 737b297ed76b6..2ed417be87813 100644 --- a/lldb/test/API/commands/frame/diagnose/bad-reference/TestBadReference.py +++ b/lldb/test/API/commands/frame/diagnose/bad-reference/TestBadReference.py @@ -22,4 +22,5 @@ def test_bad_reference(self): self.runCmd("run", RUN_SUCCEEDED) self.expect("thread list", "Thread should be stopped", substrs=['stopped']) - self.expect("frame diagnose", "Crash diagnosis was accurate", "f->b") + self.expect("frame diagnose", "Crash diagnosis was accurate", + substrs=["f->b"]) diff --git a/lldb/test/API/commands/frame/diagnose/complicated-expression/TestComplicatedExpression.py b/lldb/test/API/commands/frame/diagnose/complicated-expression/TestComplicatedExpression.py index 277fafd14b574..c1b0a0b61f47f 100644 --- a/lldb/test/API/commands/frame/diagnose/complicated-expression/TestComplicatedExpression.py +++ b/lldb/test/API/commands/frame/diagnose/complicated-expression/TestComplicatedExpression.py @@ -25,4 +25,4 @@ def test_diagnose_dereference_argument(self): self.expect( "frame diagnose", "Crash diagnosis was accurate", - "f->b->d") + substrs=["f->b->d"]) diff --git a/lldb/test/API/commands/frame/diagnose/dereference-argument/TestDiagnoseDereferenceArgument.py b/lldb/test/API/commands/frame/diagnose/dereference-argument/TestDiagnoseDereferenceArgument.py index 5d5b3a0cf17fb..e6f222a89c62a 100644 --- a/lldb/test/API/commands/frame/diagnose/dereference-argument/TestDiagnoseDereferenceArgument.py +++ b/lldb/test/API/commands/frame/diagnose/dereference-argument/TestDiagnoseDereferenceArgument.py @@ -25,4 +25,4 @@ def test_diagnose_dereference_argument(self): self.expect( "frame diagnose", "Crash diagnosis was accurate", - "f->b->d") + substrs=["f->b->d"]) diff --git a/lldb/test/API/commands/frame/diagnose/dereference-this/TestDiagnoseDereferenceThis.py b/lldb/test/API/commands/frame/diagnose/dereference-this/TestDiagnoseDereferenceThis.py index b1f6b2c87943b..e5d528fe3422b 100644 --- a/lldb/test/API/commands/frame/diagnose/dereference-this/TestDiagnoseDereferenceThis.py +++ b/lldb/test/API/commands/frame/diagnose/dereference-this/TestDiagnoseDereferenceThis.py @@ -25,4 +25,4 @@ def test_diagnose_dereference_this(self): self.expect( "frame diagnose", "Crash diagnosis was accurate", - "this->a") + substrs=["this->a"]) diff --git a/lldb/test/API/commands/frame/diagnose/inheritance/TestDiagnoseInheritance.py b/lldb/test/API/commands/frame/diagnose/inheritance/TestDiagnoseInheritance.py index 2e5a5f19b940f..f006db5219a46 100644 --- a/lldb/test/API/commands/frame/diagnose/inheritance/TestDiagnoseInheritance.py +++ b/lldb/test/API/commands/frame/diagnose/inheritance/TestDiagnoseInheritance.py @@ -22,4 +22,5 @@ def test_diagnose_inheritance(self): self.runCmd("run", RUN_SUCCEEDED) self.expect("thread list", "Thread should be stopped", substrs=['stopped']) - self.expect("frame diagnose", "Crash diagnosis was accurate", "d") + self.expect("frame diagnose", "Crash diagnosis was accurate", + substrs=["d"]) diff --git a/lldb/test/API/commands/frame/diagnose/local-variable/TestLocalVariable.py b/lldb/test/API/commands/frame/diagnose/local-variable/TestLocalVariable.py index 7e60467bf4258..4fcfa77666c0f 100644 --- a/lldb/test/API/commands/frame/diagnose/local-variable/TestLocalVariable.py +++ b/lldb/test/API/commands/frame/diagnose/local-variable/TestLocalVariable.py @@ -22,4 +22,5 @@ def test_local_variable(self): self.runCmd("run", RUN_SUCCEEDED) self.expect("thread list", "Thread should be stopped", substrs=['stopped']) - self.expect("frame diagnose", "Crash diagnosis was accurate", "myInt") + self.expect("frame diagnose", "Crash diagnosis was accurate", + substrs=["myInt"]) diff --git a/lldb/test/API/commands/frame/diagnose/virtual-method-call/TestDiagnoseDereferenceVirtualMethodCall.py b/lldb/test/API/commands/frame/diagnose/virtual-method-call/TestDiagnoseDereferenceVirtualMethodCall.py index 802bf1bd29d6e..d29d69d732293 100644 --- a/lldb/test/API/commands/frame/diagnose/virtual-method-call/TestDiagnoseDereferenceVirtualMethodCall.py +++ b/lldb/test/API/commands/frame/diagnose/virtual-method-call/TestDiagnoseDereferenceVirtualMethodCall.py @@ -22,4 +22,5 @@ def test_diagnose_virtual_method_call(self): self.runCmd("run", RUN_SUCCEEDED) self.expect("thread list", "Thread should be stopped", substrs=['stopped']) - self.expect("frame diagnose", "Crash diagnosis was accurate", "foo") + self.expect("frame diagnose", "Crash diagnosis was accurate", + substrs=["foo"]) diff --git a/lldb/test/API/commands/settings/TestSettings.py b/lldb/test/API/commands/settings/TestSettings.py index fc872f6240fef..180d45e4e9342 100644 --- a/lldb/test/API/commands/settings/TestSettings.py +++ b/lldb/test/API/commands/settings/TestSettings.py @@ -461,15 +461,15 @@ def test_settings_with_quotes(self): # if they are provided self.runCmd("settings set thread-format 'abc def' ") self.expect("settings show thread-format", - 'thread-format (format-string) = "abc def"') + startstr='thread-format (format-string) = "abc def"') self.runCmd('settings set thread-format "abc def" ') self.expect("settings show thread-format", - 'thread-format (format-string) = "abc def"') + startstr='thread-format (format-string) = "abc def"') # Make sure when no quotes are provided that we maintain any trailing # spaces self.runCmd('settings set thread-format abc def ') self.expect("settings show thread-format", - 'thread-format (format-string) = "abc def "') + startstr='thread-format (format-string) = "abc def "') self.runCmd('settings clear thread-format') def test_settings_with_trailing_whitespace(self): diff --git a/lldb/test/API/driver/batch_mode/TestBatchMode.py b/lldb/test/API/driver/batch_mode/TestBatchMode.py index df6cc87d3c343..e5364a460f9ce 100644 --- a/lldb/test/API/driver/batch_mode/TestBatchMode.py +++ b/lldb/test/API/driver/batch_mode/TestBatchMode.py @@ -44,7 +44,7 @@ def test_batch_mode_run_crash(self): child.expect_exact('(char *) touch_me_not') # Then we should have a live prompt: self.expect_prompt() - self.expect("frame variable touch_me_not", substrs='(char *) touch_me_not') + self.expect("frame variable touch_me_not", substrs=['(char *) touch_me_not']) @expectedFlakeyFreeBSD("llvm.org/pr25172 fails rarely on the buildbot") def test_batch_mode_run_exit(self): diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_by_line_and_column/TestBreakpointByLineAndColumn.py b/lldb/test/API/functionalities/breakpoint/breakpoint_by_line_and_column/TestBreakpointByLineAndColumn.py index e21f1c4553f5d..a03e2addbe975 100644 --- a/lldb/test/API/functionalities/breakpoint/breakpoint_by_line_and_column/TestBreakpointByLineAndColumn.py +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_by_line_and_column/TestBreakpointByLineAndColumn.py @@ -21,7 +21,7 @@ def testBreakpointByLineAndColumn(self): main_c = lldb.SBFileSpec("main.c") _, _, _, breakpoint = lldbutil.run_to_line_breakpoint(self, main_c, 11, 50) - self.expect("fr v did_call", substrs='1') + self.expect("fr v did_call", substrs=['1']) in_then = False for i in range(breakpoint.GetNumLocations()): b_loc = breakpoint.GetLocationAtIndex(i).GetAddress().GetLineEntry() @@ -35,7 +35,7 @@ def testBreakpointByLine(self): self.build() main_c = lldb.SBFileSpec("main.c") _, _, _, breakpoint = lldbutil.run_to_line_breakpoint(self, main_c, 11) - self.expect("fr v did_call", substrs='0') + self.expect("fr v did_call", substrs=['0']) in_condition = False for i in range(breakpoint.GetNumLocations()): b_loc = breakpoint.GetLocationAtIndex(i).GetAddress().GetLineEntry() diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/cmtime/TestDataFormatterCMTime.py b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/cmtime/TestDataFormatterCMTime.py index 8943f8313f3c7..d08ab16401fce 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/cmtime/TestDataFormatterCMTime.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/cmtime/TestDataFormatterCMTime.py @@ -48,6 +48,6 @@ def test_nsindexpath_with_run_command(self): self.expect( 'frame variable t4', substrs=['10 seconds', 'value = 10', 'timescale = 1', 'epoch = 0']) - self.expect('frame variable t5', '-oo') - self.expect('frame variable t6', '+oo') - self.expect('frame variable t7', 'indefinite') + self.expect('frame variable t5', substrs=['+oo']) + self.expect('frame variable t6', substrs=['-oo']) + self.expect('frame variable t7', substrs=['indefinite']) diff --git a/lldb/test/API/lang/cpp/constructors/TestCppConstructors.py b/lldb/test/API/lang/cpp/constructors/TestCppConstructors.py index e330093a84678..3e368d7125a9a 100644 --- a/lldb/test/API/lang/cpp/constructors/TestCppConstructors.py +++ b/lldb/test/API/lang/cpp/constructors/TestCppConstructors.py @@ -19,7 +19,7 @@ def test_constructors(self): self.expect_expr("ClassWithDeletedDefaultCtor(7).value", result_type="int", result_value="7") # FIXME: It seems we try to call the non-existent default constructor here which is wrong. - self.expect("expr ClassWithDefaultedCtor().foo()", error=True, substrs="Couldn't lookup symbols:") + self.expect("expr ClassWithDefaultedCtor().foo()", error=True, substrs=["Couldn't lookup symbols:"]) # FIXME: Calling deleted constructors should fail before linking. self.expect("expr ClassWithDeletedCtor(1).value", error=True, substrs=["Couldn't lookup symbols:"]) diff --git a/lldb/test/API/macosx/macCatalyst/TestMacCatalyst.py b/lldb/test/API/macosx/macCatalyst/TestMacCatalyst.py index 555d5a13b5559..520e92790bd26 100644 --- a/lldb/test/API/macosx/macCatalyst/TestMacCatalyst.py +++ b/lldb/test/API/macosx/macCatalyst/TestMacCatalyst.py @@ -24,8 +24,8 @@ def test_macabi(self): self.expect("image list -t -b", patterns=[self.getArchitecture() + r'.*-apple-ios.*-macabi a\.out']) - self.expect("fr v s", "Hello macCatalyst") - self.expect("p s", "Hello macCatalyst") + self.expect("fr v s", substrs=["Hello macCatalyst"]) + self.expect("p s", substrs=["Hello macCatalyst"]) self.check_debugserver(log) def check_debugserver(self, log): diff --git a/lldb/test/API/types/TestRecursiveTypes.py b/lldb/test/API/types/TestRecursiveTypes.py index 69194cfb96aed..8e84a052a22b7 100644 --- a/lldb/test/API/types/TestRecursiveTypes.py +++ b/lldb/test/API/types/TestRecursiveTypes.py @@ -50,5 +50,5 @@ def print_struct(self): self.runCmd("run", RUN_SUCCEEDED) - self.expect("print tpi", RUN_SUCCEEDED) - self.expect("print *tpi", RUN_SUCCEEDED) + self.expect("print tpi") + self.expect("print *tpi") From db80cc397e7ec4aeefc7322e6a240651506e15a6 Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Mon, 5 Oct 2020 12:43:50 -0700 Subject: [PATCH 059/321] [CodeGen][MachineSched] Fixup function name typo. NFC --- llvm/lib/CodeGen/MachineScheduler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index b6d0d9a74ac14..6d24e3b760968 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -3398,13 +3398,13 @@ ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { return DAG; } -static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) { +static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { return createGenericSchedLive(C); } static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", - createConveringSched); + createConvergingSched); //===----------------------------------------------------------------------===// // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. From 59127de2435b54c6b1621f5b3e7fabc3e79ff248 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 5 Oct 2020 22:52:32 +0300 Subject: [PATCH 060/321] [NFC][GCOV] Fix build: there's `llvm::stable_partition()` wrapper --- llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index fad1ba093acf6..527644a69d915 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -884,8 +884,9 @@ bool GCOVProfiler::emitProfileNotes( return E->Removed || (!E->InMST && !E->Place); }); const size_t Measured = - llvm::stable_partition( - MST.AllEdges, [](std::unique_ptr &E) { return E->Place; }) - + std::stable_partition( + MST.AllEdges.begin(), MST.AllEdges.end(), + [](std::unique_ptr &E) { return E->Place; }) - MST.AllEdges.begin(); for (size_t I : llvm::seq(0, Measured)) { Edge &E = *MST.AllEdges[I]; From a2cc8833683dd124cf2ee96f6d17f7f835da1fc8 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 3 Oct 2020 14:50:20 -0700 Subject: [PATCH 061/321] [CUDA] Don't call __cudaRegisterVariable on C++17 inline variables D17779: host-side shadow variables of external declarations of device-side global variables have internal linkage and are referenced by `__cuda_register_globals`. nvcc from CUDA 11 does not allow `__device__ inline` or `__device__ constexpr` (C++17 inline variables) but clang has incorrectly supported them for a while: ``` error: A __device__ variable cannot be marked constexpr error: An inline __device__/__constant__/__managed__ variable must have internal linkage when the program is compiled in whole program mode (-rdc=false) ``` If such a variable (which has a comdat group) is discarded (a copy from another translation unit is prevailing and selected), accessing the variable from outside the section group (`__cuda_register_globals`) is a violation of the ELF specification and will be rejected by linkers: > A symbol table entry with STB_LOCAL binding that is defined relative to one of a group's sections, and that is contained in a symbol table section that is not part of the group, must be discarded if the group members are discarded. References to this symbol table entry from outside the group are not allowed. As a workaround, don't register such inline variables for now. (If we register the variables in all TUs, we will keep multiple instances of the shadow and break the C++ semantics for inline variables). We should reject such variables in Sema but our internal users need some time to migrate. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D88786 --- clang/lib/CodeGen/CodeGenModule.cpp | 7 ++++++- clang/test/CodeGenCUDA/device-stub.cu | 20 +++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index c3457865c0b05..93b49ec981e82 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4129,7 +4129,12 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // Shadow variables and their properties must be registered with CUDA // runtime. Skip Extern global variables, which will be registered in // the TU where they are defined. - if (!D->hasExternalStorage()) + // + // Don't register a C++17 inline variable. The local symbol can be + // discarded and referencing a discarded local symbol from outside the + // comdat (__cuda_register_globals) is disallowed by the ELF spec. + // TODO: Reject __device__ constexpr and __device__ inline in Sema. + if (!D->hasExternalStorage() && !D->isInline()) getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(), D->hasAttr()); } else if (D->hasAttr()) { diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu index ca21116fc989e..16bbef6cfad53 100644 --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -29,6 +29,10 @@ // RUN: -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - \ // RUN: | FileCheck %s -allow-deprecated-dag-overlap \ // RUN: --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA_NEW +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -std=c++17 \ +// RUN: -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - \ +// RUN: | FileCheck %s -allow-deprecated-dag-overlap \ +// RUN: --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA_NEW,LNX_17 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ // RUN: -target-sdk-version=9.2 -o - \ // RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN @@ -91,9 +95,18 @@ __device__ int ext_device_var_def = 1; // WIN-DAG: @"?ext_constant_var_def@@3HA" = internal global i32 undef __constant__ int ext_constant_var_def = 2; +#if __cplusplus > 201402L +/// FIXME: Reject __device__ constexpr and inline variables in Sema. +// LNX_17: @inline_var = internal global i32 undef, comdat, align 4{{$}} +// LNX_17: @_ZN1C17member_inline_varE = internal constant i32 undef, comdat, align 4{{$}} +__device__ inline int inline_var = 3; +struct C { + __device__ static constexpr int member_inline_var = 4; +}; +#endif void use_pointers() { - int *p; + const int *p; p = &device_var; p = &constant_var; p = &shared_var; @@ -101,6 +114,10 @@ void use_pointers() { p = &ext_device_var; p = &ext_constant_var; p = &ext_host_var; +#if __cplusplus > 201402L + p = &inline_var; + p = &C::member_inline_var; +#endif } // Make sure that all parts of GPU code init/cleanup are there: @@ -185,6 +202,7 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{[^,]*}}, {{[^@]*}}@2, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{[^,]*}}, {{[^@]*}}@3, {{.*}}i32 0, {{i32|i64}} 4, i32 0, i32 0 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{[^,]*}}, {{[^@]*}}@4, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0 +// LNX_17-NOT: [[PREFIX]]RegisterVar(i8** %0, {{.*}}inline_var // ALL: ret void // Test that we've built a constructor. From 19e86336efa75456469a2a3491fc58e65af6bd0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Sun, 4 Oct 2020 01:19:41 +0300 Subject: [PATCH 062/321] [LLD] [COFF] Fix parsing version numbers with leading zeros Parse the components as decimal, instead of decuding the base from the string. This avoids ambiguity if the second number contains leading zeros, which previously were parsed as indicating an octal number. MS link.exe doesn't support hexadecimal numbers in the version numbers, neither in /version nor in /subsystem. Differential Revision: https://reviews.llvm.org/D88801 --- lld/COFF/DriverUtils.cpp | 4 ++-- lld/test/COFF/subsystem.test | 4 ++++ lld/test/COFF/version.test | 6 ++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp index de78359bb4460..d4449709e1b7a 100644 --- a/lld/COFF/DriverUtils.cpp +++ b/lld/COFF/DriverUtils.cpp @@ -88,10 +88,10 @@ void parseNumbers(StringRef arg, uint64_t *addr, uint64_t *size) { void parseVersion(StringRef arg, uint32_t *major, uint32_t *minor) { StringRef s1, s2; std::tie(s1, s2) = arg.split('.'); - if (s1.getAsInteger(0, *major)) + if (s1.getAsInteger(10, *major)) fatal("invalid number: " + s1); *minor = 0; - if (!s2.empty() && s2.getAsInteger(0, *minor)) + if (!s2.empty() && s2.getAsInteger(10, *minor)) fatal("invalid number: " + s2); } diff --git a/lld/test/COFF/subsystem.test b/lld/test/COFF/subsystem.test index 5c38390c71f95..a43035eb396e8 100644 --- a/lld/test/COFF/subsystem.test +++ b/lld/test/COFF/subsystem.test @@ -12,6 +12,10 @@ CHECK1: Subsystem: IMAGE_SUBSYSTEM_WINDOWS_GUI # RUN: %p/Inputs/ret42.obj # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=CHECK2 %s +# RUN: lld-link /entry:main /out:%t.exe /subsystem:windows,8.09 \ +# RUN: %p/Inputs/ret42.obj +# RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=CHECK2 %s + CHECK2: MajorOperatingSystemVersion: 8 CHECK2: MinorOperatingSystemVersion: 9 CHECK2: MajorSubsystemVersion: 8 diff --git a/lld/test/COFF/version.test b/lld/test/COFF/version.test index 3ec12e3ba74e4..ba1f8638ab4f7 100644 --- a/lld/test/COFF/version.test +++ b/lld/test/COFF/version.test @@ -17,3 +17,9 @@ CHECK1: MinorImageVersion: 0 CHECK2: MajorImageVersion: 11 CHECK2: MinorImageVersion: 22 + +# RUN: lld-link /out:%t.exe /entry:main %t.obj /version:8.09 +# RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=CHECK3 %s + +CHECK3: MajorImageVersion: 8 +CHECK3: MinorImageVersion: 9 From 45c4c54003641f2bb225eaf305c07906451d6096 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Sun, 4 Oct 2020 01:29:45 +0300 Subject: [PATCH 063/321] [LLD] [COFF] Add a private option for setting the os version separately from subsystem version The MinGW driver has separate options for OS and subsystem version. Having this available in lld-link allows the MinGW driver to both match GNU ld better and simplifies the code for merging two (potentially mismatching) arguments into one. Differential Revision: https://reviews.llvm.org/D88802 --- lld/COFF/Config.h | 2 ++ lld/COFF/Driver.cpp | 25 +++++++++++++++++++++---- lld/COFF/Driver.h | 2 +- lld/COFF/DriverUtils.cpp | 4 +++- lld/COFF/Options.td | 6 ++++++ lld/COFF/Writer.cpp | 4 ++-- lld/test/COFF/subsystem.test | 18 ++++++++++++++++++ 7 files changed, 53 insertions(+), 8 deletions(-) diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index 286b67b437a89..f0a26c19e9550 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -223,6 +223,8 @@ struct Configuration { uint32_t minorImageVersion = 0; uint32_t majorOSVersion = 6; uint32_t minorOSVersion = 0; + uint32_t majorSubsystemVersion = 6; + uint32_t minorSubsystemVersion = 0; uint32_t timestamp = 0; uint32_t functionPadMin = 0; bool dynamicBase = true; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 56717de226c29..3560f1066f290 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -408,10 +408,17 @@ void LinkerDriver::parseDirectives(InputFile *file) { case OPT_section: parseSection(arg->getValue()); break; - case OPT_subsystem: + case OPT_subsystem: { + bool gotVersion = false; parseSubsystem(arg->getValue(), &config->subsystem, - &config->majorOSVersion, &config->minorOSVersion); + &config->majorSubsystemVersion, + &config->minorSubsystemVersion, &gotVersion); + if (gotVersion) { + config->majorOSVersion = config->majorSubsystemVersion; + config->minorOSVersion = config->minorSubsystemVersion; + } break; + } // Only add flags here that link.exe accepts in // `#pragma comment(linker, "/flag")`-generated sections. case OPT_editandcontinue: @@ -1459,8 +1466,18 @@ void LinkerDriver::link(ArrayRef argsArr) { // Handle /subsystem if (auto *arg = args.getLastArg(OPT_subsystem)) - parseSubsystem(arg->getValue(), &config->subsystem, &config->majorOSVersion, - &config->minorOSVersion); + parseSubsystem(arg->getValue(), &config->subsystem, + &config->majorSubsystemVersion, + &config->minorSubsystemVersion); + + // Handle /osversion + if (auto *arg = args.getLastArg(OPT_osversion)) { + parseVersion(arg->getValue(), &config->majorOSVersion, + &config->minorOSVersion); + } else { + config->majorOSVersion = config->majorSubsystemVersion; + config->minorOSVersion = config->minorSubsystemVersion; + } // Handle /timestamp if (llvm::opt::Arg *arg = args.getLastArg(OPT_timestamp, OPT_repro)) { diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h index 3fee9b1fe50e2..5de05a1ef3259 100644 --- a/lld/COFF/Driver.h +++ b/lld/COFF/Driver.h @@ -168,7 +168,7 @@ void parseVersion(StringRef arg, uint32_t *major, uint32_t *minor); // Parses a string in the form of "[,[.]]". void parseSubsystem(StringRef arg, WindowsSubsystem *sys, uint32_t *major, - uint32_t *minor); + uint32_t *minor, bool *gotVersion = nullptr); void parseAlternateName(StringRef); void parseMerge(StringRef); diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp index d4449709e1b7a..7de73f2cfe491 100644 --- a/lld/COFF/DriverUtils.cpp +++ b/lld/COFF/DriverUtils.cpp @@ -112,7 +112,7 @@ void parseGuard(StringRef fullArg) { // Parses a string in the form of "[,[.]]". void parseSubsystem(StringRef arg, WindowsSubsystem *sys, uint32_t *major, - uint32_t *minor) { + uint32_t *minor, bool *gotVersion) { StringRef sysStr, ver; std::tie(sysStr, ver) = arg.split(','); std::string sysStrLower = sysStr.lower(); @@ -132,6 +132,8 @@ void parseSubsystem(StringRef arg, WindowsSubsystem *sys, uint32_t *major, fatal("unknown subsystem: " + sysStr); if (!ver.empty()) parseVersion(ver, major, minor); + if (gotVersion) + *gotVersion = !ver.empty(); } // Parse a string of the form of "=". diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td index d1badf0fdd2f0..d27e95f9bd600 100644 --- a/lld/COFF/Options.td +++ b/lld/COFF/Options.td @@ -9,6 +9,11 @@ class F : Flag<["/", "-", "/?", "-?"], name>; class P : Joined<["/", "-", "/?", "-?"], name#":">, HelpText; +// Same as P<> above, but without help texts, for private undocumented +// options. +class P_priv : + Joined<["/", "-", "/?", "-?"], name#":">; + // Boolean flag which can be suffixed by ":no". Using it unsuffixed turns the // flag on and using it suffixed by ":no" turns it off. multiclass B { @@ -205,6 +210,7 @@ def include_optional : Joined<["/", "-", "/?", "-?"], "includeoptional:">, def kill_at : F<"kill-at">; def lldmingw : F<"lldmingw">; def noseh : F<"noseh">; +def osversion : P_priv<"osversion">; def output_def : Joined<["/", "-", "/?", "-?"], "output-def:">; def pdb_source_path : P<"pdbsourcepath", "Base path used to make relative source file path absolute in PDB">; diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index d1081b008ea40..145d517c9c0e8 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -1373,8 +1373,8 @@ template void Writer::writeHeader() { pe->MinorImageVersion = config->minorImageVersion; pe->MajorOperatingSystemVersion = config->majorOSVersion; pe->MinorOperatingSystemVersion = config->minorOSVersion; - pe->MajorSubsystemVersion = config->majorOSVersion; - pe->MinorSubsystemVersion = config->minorOSVersion; + pe->MajorSubsystemVersion = config->majorSubsystemVersion; + pe->MinorSubsystemVersion = config->minorSubsystemVersion; pe->Subsystem = config->subsystem; pe->SizeOfImage = sizeOfImage; pe->SizeOfHeaders = sizeOfHeaders; diff --git a/lld/test/COFF/subsystem.test b/lld/test/COFF/subsystem.test index a43035eb396e8..e9a6966a01da9 100644 --- a/lld/test/COFF/subsystem.test +++ b/lld/test/COFF/subsystem.test @@ -30,3 +30,21 @@ CHECK3: MajorOperatingSystemVersion: 8 CHECK3: MinorOperatingSystemVersion: 9 CHECK3: MajorSubsystemVersion: 8 CHECK3: MinorSubsystemVersion: 9 + +# RUN: lld-link /entry:main /out:%t.exe /osversion:1.2 \ +# RUN: %p/Inputs/ret42.obj +# RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=CHECK4 %s + +CHECK4: MajorOperatingSystemVersion: 1 +CHECK4: MinorOperatingSystemVersion: 2 +CHECK4: MajorSubsystemVersion: 6 +CHECK4: MinorSubsystemVersion: 0 + +# RUN: lld-link /entry:main /out:%t.exe /osversion:1.2 /subsystem:default,3.4 \ +# RUN: %p/Inputs/ret42.obj +# RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=CHECK5 %s + +CHECK5: MajorOperatingSystemVersion: 1 +CHECK5: MinorOperatingSystemVersion: 2 +CHECK5: MajorSubsystemVersion: 3 +CHECK5: MinorSubsystemVersion: 4 From bc8f3b424c7785754c1de8a2f5a1e2c16a997143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Sun, 4 Oct 2020 11:52:36 +0300 Subject: [PATCH 064/321] [LLD] [MinGW] Simplify handling of os/subsystem version As they can be set independently after D88802, we can get rid of a bit of extra code - simplifying the logic here before adding more complication to it later. Differential Revision: https://reviews.llvm.org/D88803 --- lld/COFF/Config.h | 2 ++ lld/MinGW/Driver.cpp | 24 ++++++------------------ lld/test/MinGW/driver.test | 13 ++++++++----- 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index f0a26c19e9550..77a08c200da29 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -221,6 +221,8 @@ struct Configuration { uint64_t heapCommit = 4096; uint32_t majorImageVersion = 0; uint32_t minorImageVersion = 0; + // If changing the default os/subsys version here, update the default in + // the MinGW driver accordingly. uint32_t majorOSVersion = 6; uint32_t minorOSVersion = 0; uint32_t majorSubsystemVersion = 6; diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp index af7ecb8801e9d..d976e4cc75fb6 100644 --- a/lld/MinGW/Driver.cpp +++ b/lld/MinGW/Driver.cpp @@ -214,25 +214,13 @@ bool mingw::link(ArrayRef argsArr, bool canExitEarly, if (args.hasArg(OPT_major_os_version, OPT_minor_os_version, OPT_major_subsystem_version, OPT_minor_subsystem_version)) { - auto *majOSVer = args.getLastArg(OPT_major_os_version); - auto *minOSVer = args.getLastArg(OPT_minor_os_version); - auto *majSubSysVer = args.getLastArg(OPT_major_subsystem_version); - auto *minSubSysVer = args.getLastArg(OPT_minor_subsystem_version); - if (majOSVer && majSubSysVer && - StringRef(majOSVer->getValue()) != StringRef(majSubSysVer->getValue())) - warn("--major-os-version and --major-subsystem-version set to differing " - "versions, not supported"); - if (minOSVer && minSubSysVer && - StringRef(minOSVer->getValue()) != StringRef(minSubSysVer->getValue())) - warn("--minor-os-version and --minor-subsystem-version set to differing " - "versions, not supported"); + StringRef majOSVer = args.getLastArgValue(OPT_major_os_version, "6"); + StringRef minOSVer = args.getLastArgValue(OPT_minor_os_version, "0"); + StringRef majSubSysVer = args.getLastArgValue(OPT_major_subsystem_version, "6"); + StringRef minSubSysVer = args.getLastArgValue(OPT_minor_subsystem_version, "0"); StringRef subSys = args.getLastArgValue(OPT_subs, "default"); - StringRef major = majOSVer ? majOSVer->getValue() - : majSubSysVer ? majSubSysVer->getValue() : "6"; - StringRef minor = minOSVer ? minOSVer->getValue() - : minSubSysVer ? minSubSysVer->getValue() : ""; - StringRef sep = minor.empty() ? "" : "."; - add("-subsystem:" + subSys + "," + major + sep + minor); + add("-osversion:" + majOSVer + "." + minOSVer); + add("-subsystem:" + subSys + "," + majSubSysVer + "." + minSubSysVer); } else if (auto *a = args.getLastArg(OPT_subs)) { add("-subsystem:" + StringRef(a->getValue())); } diff --git a/lld/test/MinGW/driver.test b/lld/test/MinGW/driver.test index c9fa6ea9f97d8..fdf5a29d6d0d5 100644 --- a/lld/test/MinGW/driver.test +++ b/lld/test/MinGW/driver.test @@ -63,18 +63,21 @@ RUN: ld.lld -### foo.o -m i386pep -subsystem=console | FileCheck -check-prefix=S RUN: ld.lld -### foo.o -m i386pep --subsystem=console | FileCheck -check-prefix=SUBSYSTEM %s SUBSYSTEM: -subsystem:console -RUN: ld.lld -### foo.o -m i386pep --major-os-version 7 --minor-os-version 8 | FileCheck -check-prefix=SUBSYSTEM_VERSION %s +RUN: ld.lld -### foo.o -m i386pep --major-os-version 7 --minor-os-version 8 | FileCheck -check-prefix=OS_VERSION %s +RUN: ld.lld -### foo.o -m i386pep --major-os-version=7 --minor-os-version=8 | FileCheck -check-prefix=OS_VERSION %s +OS_VERSION: -osversion:7.8 -subsystem:default,6.0 + RUN: ld.lld -### foo.o -m i386pep --major-subsystem-version 7 --minor-subsystem-version 8 | FileCheck -check-prefix=SUBSYSTEM_VERSION %s -RUN: ld.lld -### foo.o -m i386pep --major-os-version=7 --minor-os-version=8 --major-subsystem-version=7 --minor-subsystem-version=8 | FileCheck -check-prefix=SUBSYSTEM_VERSION %s -SUBSYSTEM_VERSION: -subsystem:default,7.8 +RUN: ld.lld -### foo.o -m i386pep --major-subsystem-version=7 --minor-subsystem-version=8 | FileCheck -check-prefix=SUBSYSTEM_VERSION %s +SUBSYSTEM_VERSION: -osversion:6.0 -subsystem:default,7.8 RUN: ld.lld -### foo.o -m i386pep --minor-subsystem-version 8 | FileCheck -check-prefix=SUBSYSTEM_DEFAULT_MAJOR %s SUBSYSTEM_DEFAULT_MAJOR: -subsystem:default,6.8 RUN: ld.lld -### foo.o -m i386pep --major-subsystem-version 7 | FileCheck -check-prefix=SUBSYSTEM_DEFAULT_MINOR %s -SUBSYSTEM_DEFAULT_MINOR: -subsystem:default,7 +SUBSYSTEM_DEFAULT_MINOR: -subsystem:default,7.0 -RUN: ld.lld -### foo.o -m i386pep --subsystem windows --major-os-version 7 --minor-os-version 8 | FileCheck -check-prefix=SUBSYSTEM_WINDOWS %s +RUN: ld.lld -### foo.o -m i386pep --subsystem windows --major-subsystem-version 7 --minor-subsystem-version 8 | FileCheck -check-prefix=SUBSYSTEM_WINDOWS %s SUBSYSTEM_WINDOWS: -subsystem:windows,7.8 RUN: ld.lld -### foo.o -m i386pep -stack 4194304,8192 | FileCheck -check-prefix=STACK %s From 61e2f9fa2e514c053c571228639ccdb5d21fadd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Sun, 4 Oct 2020 17:59:33 +0300 Subject: [PATCH 065/321] [LLD] [MinGW] Support setting the subsystem version via the subsystem argument If a version is specified both with --{major,minor}-subsystem-version and with --subsystem :, the one specified last (that actually sets a version) takes precedance in GNU ld; thus doing the same here. Differential Revision: https://reviews.llvm.org/D88804 --- lld/MinGW/Driver.cpp | 40 ++++++++++++++++++++++++++++++++------ lld/test/MinGW/driver.test | 6 ++++++ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp index d976e4cc75fb6..0a138d8a2303d 100644 --- a/lld/MinGW/Driver.cpp +++ b/lld/MinGW/Driver.cpp @@ -216,13 +216,41 @@ bool mingw::link(ArrayRef argsArr, bool canExitEarly, OPT_major_subsystem_version, OPT_minor_subsystem_version)) { StringRef majOSVer = args.getLastArgValue(OPT_major_os_version, "6"); StringRef minOSVer = args.getLastArgValue(OPT_minor_os_version, "0"); - StringRef majSubSysVer = args.getLastArgValue(OPT_major_subsystem_version, "6"); - StringRef minSubSysVer = args.getLastArgValue(OPT_minor_subsystem_version, "0"); - StringRef subSys = args.getLastArgValue(OPT_subs, "default"); + StringRef majSubSysVer = "6"; + StringRef minSubSysVer = "0"; + StringRef subSysName = "default"; + StringRef subSysVer; + // Iterate over --{major,minor}-subsystem-version and --subsystem, and pick + // the version number components from the last one of them that specifies + // a version. + for (auto *a : args.filtered(OPT_major_subsystem_version, + OPT_minor_subsystem_version, OPT_subs)) { + switch (a->getOption().getID()) { + case OPT_major_subsystem_version: + majSubSysVer = a->getValue(); + break; + case OPT_minor_subsystem_version: + minSubSysVer = a->getValue(); + break; + case OPT_subs: + std::tie(subSysName, subSysVer) = StringRef(a->getValue()).split(':'); + if (!subSysVer.empty()) { + if (subSysVer.contains('.')) + std::tie(majSubSysVer, minSubSysVer) = subSysVer.split('.'); + else + majSubSysVer = subSysVer; + } + break; + } + } add("-osversion:" + majOSVer + "." + minOSVer); - add("-subsystem:" + subSys + "," + majSubSysVer + "." + minSubSysVer); - } else if (auto *a = args.getLastArg(OPT_subs)) { - add("-subsystem:" + StringRef(a->getValue())); + add("-subsystem:" + subSysName + "," + majSubSysVer + "." + minSubSysVer); + } else if (args.hasArg(OPT_subs)) { + StringRef subSys = args.getLastArgValue(OPT_subs, "default"); + StringRef subSysName, subSysVer; + std::tie(subSysName, subSysVer) = subSys.split(':'); + StringRef sep = subSysVer.empty() ? "" : ","; + add("-subsystem:" + subSysName + sep + subSysVer); } if (auto *a = args.getLastArg(OPT_out_implib)) diff --git a/lld/test/MinGW/driver.test b/lld/test/MinGW/driver.test index fdf5a29d6d0d5..4ae8ac7f547cd 100644 --- a/lld/test/MinGW/driver.test +++ b/lld/test/MinGW/driver.test @@ -78,6 +78,12 @@ RUN: ld.lld -### foo.o -m i386pep --major-subsystem-version 7 | FileCheck -check SUBSYSTEM_DEFAULT_MINOR: -subsystem:default,7.0 RUN: ld.lld -### foo.o -m i386pep --subsystem windows --major-subsystem-version 7 --minor-subsystem-version 8 | FileCheck -check-prefix=SUBSYSTEM_WINDOWS %s +RUN: ld.lld -### foo.o -m i386pep --major-subsystem-version 7 --minor-subsystem-version 8 --subsystem windows | FileCheck -check-prefix=SUBSYSTEM_WINDOWS %s +RUN: ld.lld -### foo.o -m i386pep --subsystem windows:7.8 | FileCheck -check-prefix=SUBSYSTEM_WINDOWS %s +RUN: ld.lld -### foo.o -m i386pep --subsystem windows:2.3 --major-subsystem-version 7 --minor-subsystem-version 8 | FileCheck -check-prefix=SUBSYSTEM_WINDOWS %s +RUN: ld.lld -### foo.o -m i386pep --subsystem windows:7.8 | FileCheck -check-prefix=SUBSYSTEM_WINDOWS %s +RUN: ld.lld -### foo.o -m i386pep --major-subsystem-version 2 --minor-subsystem-version 3 --subsystem windows:7.8 | FileCheck -check-prefix=SUBSYSTEM_WINDOWS %s +RUN: ld.lld -### foo.o -m i386pep --major-subsystem-version 2 --minor-subsystem-version 8 --subsystem windows:7 | FileCheck -check-prefix=SUBSYSTEM_WINDOWS %s SUBSYSTEM_WINDOWS: -subsystem:windows,7.8 RUN: ld.lld -### foo.o -m i386pep -stack 4194304,8192 | FileCheck -check-prefix=STACK %s From 07cb4c013c43721565530f3be077c947804da8a6 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Thu, 1 Oct 2020 11:19:38 -0700 Subject: [PATCH 066/321] [flang] Make binary->decimal conversion buffer sizes accurate The binary values that produce the most significant decimal digits in an exact conversion are those with the least normal biased exponent (1) and all fractional bits set, not the least-valued subnormals. So the binary->decimal conversion buffer sizes were a little short, and could cause a overrun crash. Differential revision: https://reviews.llvm.org/D88688 --- flang/include/flang/Common/real.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/flang/include/flang/Common/real.h b/flang/include/flang/Common/real.h index eec29f1ca7b33..6ff9e441f2ce0 100644 --- a/flang/include/flang/Common/real.h +++ b/flang/include/flang/Common/real.h @@ -39,26 +39,25 @@ static constexpr int BitsForBinaryPrecision(int binaryPrecision) { } } -// Number of significant decimal digits in the fraction of the -// exact conversion of the least nonzero (subnormal) value -// in each type; i.e., a 128-bit quad value can be formatted -// exactly with FORMAT(E0.22981). +// Maximum number of significant decimal digits in the fraction of an +// exact conversion in each type; computed by converting the value +// with the minimum exponent (biased to 1) and all fractional bits set. static constexpr int MaxDecimalConversionDigits(int binaryPrecision) { switch (binaryPrecision) { case 8: - return 93; + return 96; case 11: - return 17; + return 21; case 24: - return 105; + return 112; case 53: - return 751; + return 767; case 64: - return 11495; + return 11514; case 106: - return 2 * 751; + return 2 * 767; case 113: - return 11530; + return 11563; default: return -1; } From a506a66bd90cb15885833012fb2c2b7873bff541 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 2 Oct 2020 15:50:55 -0400 Subject: [PATCH 067/321] [libc++] Fix several debug mode tests These tests were only being run when _LIBCPP_DEBUG was defined, which isn't the case by default when we run the test suite. In other words, all these debug mode tests were never being run. This commit makes sure they are run, and in some cases, extracts them into a file under test/libcxx to separate them from the Standard tests. Differential Revision: https://reviews.llvm.org/D88836 --- .../sequences/vector/db_back.pass.cpp | 14 ++----- .../sequences/vector/db_cback.pass.cpp | 14 ++----- .../sequences/vector/db_cfront.pass.cpp | 14 ++----- .../sequences/vector/db_cindex.pass.cpp | 14 ++----- .../sequences/vector/db_front.pass.cpp | 14 ++----- .../sequences/vector/db_index.pass.cpp | 14 ++----- .../sequences/vector/db_iterators_2.pass.cpp | 18 +++----- .../sequences/vector/db_iterators_3.pass.cpp | 18 +++----- .../sequences/vector/db_iterators_4.pass.cpp | 14 ++----- .../sequences/vector/db_iterators_5.pass.cpp | 14 ++----- .../sequences/vector/db_iterators_6.pass.cpp | 14 ++----- .../sequences/vector/db_iterators_7.pass.cpp | 14 ++----- .../sequences/vector/db_iterators_8.pass.cpp | 18 +++----- .../sequences/vector/pop_back_empty.pass.cpp | 6 ++- .../unord/unord.map/db_bucket.pass.cpp | 35 ++++++++++++++++ .../db_insert_hint_const_lvalue.pass.cpp | 39 +++++++++++++++++ .../unord.map/db_insert_hint_rvalue.pass.cpp | 42 +++++++++++++++++++ .../unord/unord.map/db_iterators_7.pass.cpp | 14 ++----- .../unord/unord.map/db_iterators_8.pass.cpp | 14 ++----- .../unord.map/db_local_iterators_7.pass.cpp | 14 ++----- .../unord.map/db_local_iterators_8.pass.cpp | 14 ++----- .../unord/unord.map/db_move.pass.cpp | 40 ++++++++++++++++++ .../erase_iter_db1.pass.cpp | 14 ++----- .../erase_iter_db2.pass.cpp | 14 ++----- .../erase_iter_iter_db1.pass.cpp | 14 ++----- .../erase_iter_iter_db2.pass.cpp | 14 ++----- .../erase_iter_iter_db3.pass.cpp | 14 ++----- .../erase_iter_iter_db4.pass.cpp | 14 ++----- .../unord.map.swap/db_swap_1.pass.cpp | 10 ++--- .../db_insert_hint_const_lvalue.pass.cpp | 39 +++++++++++++++++ .../db_insert_hint_rvalue.pass.cpp | 42 +++++++++++++++++++ .../unord.multimap/db_iterators_7.pass.cpp | 14 ++----- .../unord.multimap/db_iterators_8.pass.cpp | 14 ++----- .../db_local_iterators_7.pass.cpp | 14 ++----- .../db_local_iterators_8.pass.cpp | 14 ++----- .../unord/unord.multimap/db_move.pass.cpp | 40 ++++++++++++++++++ .../erase_iter_db1.pass.cpp | 14 ++----- .../erase_iter_db2.pass.cpp | 14 ++----- .../erase_iter_iter_db1.pass.cpp | 14 ++----- .../erase_iter_iter_db2.pass.cpp | 14 ++----- .../erase_iter_iter_db3.pass.cpp | 14 ++----- .../erase_iter_iter_db4.pass.cpp | 14 ++----- .../unord.multimap.swap/db_swap_1.pass.cpp | 10 ++--- .../db_insert_hint_const_lvalue.pass.cpp | 39 +++++++++++++++++ .../unord.multiset/db_iterators_7.pass.cpp | 14 ++----- .../unord.multiset/db_iterators_8.pass.cpp | 14 ++----- .../db_local_iterators_7.pass.cpp | 14 ++----- .../db_local_iterators_8.pass.cpp | 14 ++----- .../unord/unord.multiset/db_move.pass.cpp | 40 ++++++++++++++++++ .../unord.multiset/erase_iter_db1.pass.cpp | 14 ++----- .../unord.multiset/erase_iter_db2.pass.cpp | 14 ++----- .../erase_iter_iter_db1.pass.cpp | 14 ++----- .../erase_iter_iter_db2.pass.cpp | 14 ++----- .../erase_iter_iter_db3.pass.cpp | 14 ++----- .../erase_iter_iter_db4.pass.cpp | 14 ++----- .../unord.multiset.swap/db_swap_1.pass.cpp | 10 ++--- .../db_insert_hint_const_lvalue.pass.cpp | 39 +++++++++++++++++ .../unord/unord.set/db_iterators_7.pass.cpp | 14 ++----- .../unord/unord.set/db_iterators_8.pass.cpp | 14 ++----- .../unord.set/db_local_iterators_7.pass.cpp | 14 ++----- .../unord.set/db_local_iterators_8.pass.cpp | 14 ++----- .../unord/unord.set/db_move.pass.cpp | 40 ++++++++++++++++++ .../unord/unord.set/erase_iter_db1.pass.cpp | 14 ++----- .../unord/unord.set/erase_iter_db2.pass.cpp | 14 ++----- .../unord.set/erase_iter_iter_db1.pass.cpp | 14 ++----- .../unord.set/erase_iter_iter_db2.pass.cpp | 14 ++----- .../unord.set/erase_iter_iter_db3.pass.cpp | 14 ++----- .../unord.set/erase_iter_iter_db4.pass.cpp | 14 ++----- .../unord.set.swap/db_swap_1.pass.cpp | 10 ++--- .../string.access/db_back.pass.cpp | 14 ++----- .../string.access/db_cback.pass.cpp | 14 ++----- .../string.access/db_cfront.pass.cpp | 14 ++----- .../string.access/db_cindex.pass.cpp | 14 ++----- .../string.access/db_front.pass.cpp | 14 ++----- .../string.access/db_index.pass.cpp | 14 ++----- .../string.iterators/db_iterators_2.pass.cpp | 14 ++----- .../string.iterators/db_iterators_3.pass.cpp | 14 ++----- .../string.iterators/db_iterators_4.pass.cpp | 14 ++----- .../string.iterators/db_iterators_5.pass.cpp | 14 ++----- .../string.iterators/db_iterators_6.pass.cpp | 14 ++----- .../string.iterators/db_iterators_7.pass.cpp | 14 ++----- .../string.iterators/db_iterators_8.pass.cpp | 14 ++----- .../clear_and_shrink_db1.pass.cpp | 14 ++----- .../string.modifiers/erase_iter_db1.pass.cpp | 14 ++----- .../string.modifiers/erase_iter_db2.pass.cpp | 14 ++----- .../erase_iter_iter_db1.pass.cpp | 14 ++----- .../erase_iter_iter_db2.pass.cpp | 14 ++----- .../erase_iter_iter_db3.pass.cpp | 14 ++----- .../erase_iter_iter_db4.pass.cpp | 14 ++----- .../erase_pop_back_db1.pass.cpp | 10 ++--- .../insert_iter_char_db1.pass.cpp | 10 ++--- .../insert_iter_iter_iter_db1.pass.cpp | 38 +++++++++++++++++ .../insert_iter_size_char_db1.pass.cpp | 10 ++--- .../unord/unord.map/bucket.pass.cpp | 14 +------ .../unord.map/unord.map.cnstr/move.pass.cpp | 13 +----- .../insert_hint_const_lvalue.pass.cpp | 19 +-------- .../insert_hint_rvalue.pass.cpp | 18 +------- .../unord.multimap.cnstr/move.pass.cpp | 13 +----- .../insert_hint_const_lvalue.pass.cpp | 19 +-------- .../insert_hint_rvalue.pass.cpp | 18 +------- .../insert_hint_const_lvalue.pass.cpp | 19 +-------- .../unord.multiset.cnstr/move.pass.cpp | 13 +----- .../insert_hint_const_lvalue.pass.cpp | 19 +-------- .../unord.set/unord.set.cnstr/move.pass.cpp | 13 +----- .../string_insert/iter_iter_iter.pass.cpp | 14 ------- 105 files changed, 749 insertions(+), 1026 deletions(-) create mode 100644 libcxx/test/libcxx/containers/unord/unord.map/db_bucket.pass.cpp create mode 100644 libcxx/test/libcxx/containers/unord/unord.map/db_insert_hint_const_lvalue.pass.cpp create mode 100644 libcxx/test/libcxx/containers/unord/unord.map/db_insert_hint_rvalue.pass.cpp create mode 100644 libcxx/test/libcxx/containers/unord/unord.map/db_move.pass.cpp rename libcxx/test/{std => libcxx}/containers/unord/unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp (85%) rename libcxx/test/{std => libcxx}/containers/unord/unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp (86%) rename libcxx/test/{std => libcxx}/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db1.pass.cpp (87%) rename libcxx/test/{std => libcxx}/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db2.pass.cpp (87%) rename libcxx/test/{std => libcxx}/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db3.pass.cpp (87%) rename libcxx/test/{std => libcxx}/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db4.pass.cpp (86%) rename libcxx/test/{std => libcxx}/containers/unord/unord.map/unord.map.swap/db_swap_1.pass.cpp (90%) create mode 100644 libcxx/test/libcxx/containers/unord/unord.multimap/db_insert_hint_const_lvalue.pass.cpp create mode 100644 libcxx/test/libcxx/containers/unord/unord.multimap/db_insert_hint_rvalue.pass.cpp rename libcxx/test/{std => libcxx}/containers/unord/unord.multimap/db_iterators_7.pass.cpp (89%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multimap/db_iterators_8.pass.cpp (89%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multimap/db_local_iterators_7.pass.cpp (88%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multimap/db_local_iterators_8.pass.cpp (89%) create mode 100644 libcxx/test/libcxx/containers/unord/unord.multimap/db_move.pass.cpp rename libcxx/test/{std => libcxx}/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db1.pass.cpp (85%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db2.pass.cpp (87%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp (87%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp (87%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp (87%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp (86%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp (90%) create mode 100644 libcxx/test/libcxx/containers/unord/unord.multiset/db_insert_hint_const_lvalue.pass.cpp rename libcxx/test/{std => libcxx}/containers/unord/unord.multiset/db_iterators_7.pass.cpp (88%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multiset/db_iterators_8.pass.cpp (88%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multiset/db_local_iterators_7.pass.cpp (88%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multiset/db_local_iterators_8.pass.cpp (88%) create mode 100644 libcxx/test/libcxx/containers/unord/unord.multiset/db_move.pass.cpp rename libcxx/test/{std => libcxx}/containers/unord/unord.multiset/erase_iter_db1.pass.cpp (84%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multiset/erase_iter_db2.pass.cpp (86%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multiset/erase_iter_iter_db1.pass.cpp (86%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multiset/erase_iter_iter_db2.pass.cpp (86%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multiset/erase_iter_iter_db3.pass.cpp (86%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multiset/erase_iter_iter_db4.pass.cpp (85%) rename libcxx/test/{std => libcxx}/containers/unord/unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp (89%) create mode 100644 libcxx/test/libcxx/containers/unord/unord.set/db_insert_hint_const_lvalue.pass.cpp rename libcxx/test/{std => libcxx}/containers/unord/unord.set/db_iterators_7.pass.cpp (88%) rename libcxx/test/{std => libcxx}/containers/unord/unord.set/db_iterators_8.pass.cpp (88%) rename libcxx/test/{std => libcxx}/containers/unord/unord.set/db_local_iterators_7.pass.cpp (88%) rename libcxx/test/{std => libcxx}/containers/unord/unord.set/db_local_iterators_8.pass.cpp (88%) create mode 100644 libcxx/test/libcxx/containers/unord/unord.set/db_move.pass.cpp rename libcxx/test/{std => libcxx}/containers/unord/unord.set/erase_iter_db1.pass.cpp (84%) rename libcxx/test/{std => libcxx}/containers/unord/unord.set/erase_iter_db2.pass.cpp (86%) rename libcxx/test/{std => libcxx}/containers/unord/unord.set/erase_iter_iter_db1.pass.cpp (86%) rename libcxx/test/{std => libcxx}/containers/unord/unord.set/erase_iter_iter_db2.pass.cpp (86%) rename libcxx/test/{std => libcxx}/containers/unord/unord.set/erase_iter_iter_db3.pass.cpp (86%) rename libcxx/test/{std => libcxx}/containers/unord/unord.set/erase_iter_iter_db4.pass.cpp (85%) rename libcxx/test/{std => libcxx}/containers/unord/unord.set/unord.set.swap/db_swap_1.pass.cpp (88%) rename libcxx/test/{std => libcxx}/strings/basic.string/string.access/db_back.pass.cpp (87%) rename libcxx/test/{std => libcxx}/strings/basic.string/string.access/db_cback.pass.cpp (87%) rename libcxx/test/{std => libcxx}/strings/basic.string/string.access/db_cfront.pass.cpp (87%) rename libcxx/test/{std => libcxx}/strings/basic.string/string.access/db_cindex.pass.cpp (87%) rename libcxx/test/{std => libcxx}/strings/basic.string/string.access/db_front.pass.cpp (87%) rename libcxx/test/{std => libcxx}/strings/basic.string/string.access/db_index.pass.cpp (87%) rename libcxx/test/{std => libcxx}/strings/basic.string/string.iterators/db_iterators_2.pass.cpp (87%) rename libcxx/test/{std => libcxx}/strings/basic.string/string.iterators/db_iterators_3.pass.cpp (87%) rename libcxx/test/{std => libcxx}/strings/basic.string/string.iterators/db_iterators_4.pass.cpp (88%) rename libcxx/test/{std => libcxx}/strings/basic.string/string.iterators/db_iterators_5.pass.cpp (88%) rename libcxx/test/{std => libcxx}/strings/basic.string/string.iterators/db_iterators_6.pass.cpp (88%) rename libcxx/test/{std => libcxx}/strings/basic.string/string.iterators/db_iterators_7.pass.cpp (88%) rename libcxx/test/{std => libcxx}/strings/basic.string/string.iterators/db_iterators_8.pass.cpp (87%) create mode 100644 libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_iter_iter_db1.pass.cpp diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_back.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_back.pass.cpp index 3a35a086b636c..60056d9824071 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_back.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_back.pass.cpp @@ -10,8 +10,10 @@ // Call back() on empty container. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -46,13 +48,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_cback.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_cback.pass.cpp index 1c516ba57d960..d038e2987e105 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_cback.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_cback.pass.cpp @@ -10,8 +10,10 @@ // Call back() on empty const container. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -42,13 +44,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_cfront.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_cfront.pass.cpp index 1dc7211f20845..7175a0930043d 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_cfront.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_cfront.pass.cpp @@ -10,8 +10,10 @@ // Call front() on empty const container. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -42,13 +44,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_cindex.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_cindex.pass.cpp index ceab50a86b9a0..9c094b90afb43 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_cindex.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_cindex.pass.cpp @@ -10,8 +10,10 @@ // Index const vector out of bounds. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -44,13 +46,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_front.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_front.pass.cpp index a4aafcaefb7ee..b68fdf829371f 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_front.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_front.pass.cpp @@ -10,8 +10,10 @@ // Call front() on empty container. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -46,13 +48,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_index.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_index.pass.cpp index a17ba27421100..3796969775f64 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_index.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_index.pass.cpp @@ -10,8 +10,10 @@ // Index vector out of bounds. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -46,13 +48,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_2.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_2.pass.cpp index 975b5e951158e..b1a1c5aef26a7 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_2.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_2.pass.cpp @@ -10,8 +10,10 @@ // Compare iterators from different containers with <. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -30,7 +32,7 @@ int main(int, char**) typedef std::vector C; C c1; C c2; - bool b = c1.begin() < c2.begin(); + bool b = c1.begin() < c2.begin(); (void)b; assert(false); } #if TEST_STD_VER >= 11 @@ -39,18 +41,8 @@ int main(int, char**) typedef std::vector> C; C c1; C c2; - bool b = c1.begin() < c2.begin(); + bool b = c1.begin() < c2.begin(); (void)b; assert(false); } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_3.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_3.pass.cpp index 0dcd6e7f240ae..45e6b2641d395 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_3.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_3.pass.cpp @@ -10,8 +10,10 @@ // Subtract iterators from different containers. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -30,7 +32,7 @@ int main(int, char**) typedef std::vector C; C c1; C c2; - int i = c1.begin() - c2.begin(); + int i = c1.begin() - c2.begin(); (void)i; assert(false); } #if TEST_STD_VER >= 11 @@ -39,18 +41,8 @@ int main(int, char**) typedef std::vector> C; C c1; C c2; - int i = c1.begin() - c2.begin(); + int i = c1.begin() - c2.begin(); (void)i; assert(false); } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_4.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_4.pass.cpp index 8d048f2fd2781..ae62fabdbdaf2 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_4.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_4.pass.cpp @@ -10,8 +10,10 @@ // Index iterator out of bounds. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -46,13 +48,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_5.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_5.pass.cpp index 19060da3d1a84..330e8dd3210d4 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_5.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_5.pass.cpp @@ -10,8 +10,10 @@ // Add to iterator out of bounds. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -50,13 +52,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_6.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_6.pass.cpp index 13156c22fe38b..97b406f5338fe 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_6.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_6.pass.cpp @@ -10,8 +10,10 @@ // Decrement iterator prior to begin. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -48,13 +50,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_7.pass.cpp index 943c5209b0d51..7dbee2134a59f 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_7.pass.cpp @@ -10,8 +10,10 @@ // Increment iterator past end. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -48,13 +50,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_8.pass.cpp index 39f26f66801af..0754aaee597ee 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_8.pass.cpp @@ -10,8 +10,10 @@ // Dereference non-dereferenceable iterator. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -30,7 +32,7 @@ int main(int, char**) typedef std::vector C; C c(1); C::iterator i = c.end(); - T j = *i; + T j = *i; (void)j; assert(false); } #if TEST_STD_VER >= 11 @@ -39,18 +41,8 @@ int main(int, char**) typedef std::vector> C; C c(1); C::iterator i = c.end(); - T j = *i; + T j = *i; (void)j; assert(false); } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/sequences/vector/pop_back_empty.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/pop_back_empty.pass.cpp index b35c6dbff6a8d..32ab5f65ddc15 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/pop_back_empty.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/pop_back_empty.pass.cpp @@ -10,6 +10,10 @@ // pop_back() more than the number of elements in a vector +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -25,5 +29,5 @@ int main(int, char**) { v.pop_back(); std::exit(1); - return 0; + return 0; } diff --git a/libcxx/test/libcxx/containers/unord/unord.map/db_bucket.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/db_bucket.pass.cpp new file mode 100644 index 0000000000000..242b43912fb06 --- /dev/null +++ b/libcxx/test/libcxx/containers/unord/unord.map/db_bucket.pass.cpp @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// size_type bucket(const key_type& __k) const; + +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 +#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) + +#include +#include +#include + +#include "test_macros.h" + +int main(int, char**) +{ + { + typedef std::unordered_map C; + C c; + (void) c.bucket(3); + assert(false); + } + + return 0; +} diff --git a/libcxx/test/libcxx/containers/unord/unord.map/db_insert_hint_const_lvalue.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/db_insert_hint_const_lvalue.pass.cpp new file mode 100644 index 0000000000000..5c6c51fa7ade5 --- /dev/null +++ b/libcxx/test/libcxx/containers/unord/unord.map/db_insert_hint_const_lvalue.pass.cpp @@ -0,0 +1,39 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// iterator insert(const_iterator p, const value_type& x); + +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 +#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) + +#include +#include + +#include "test_macros.h" + +int main(int, char**) +{ + { + typedef std::unordered_map C; + typedef C::iterator R; + typedef C::value_type P; + C c; + C c2; + C::const_iterator e = c2.end(); + P v(3.5, 3); + R r = c.insert(e, v); + assert(false); + } + + return 0; +} diff --git a/libcxx/test/libcxx/containers/unord/unord.map/db_insert_hint_rvalue.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/db_insert_hint_rvalue.pass.cpp new file mode 100644 index 0000000000000..83ac37948fa3e --- /dev/null +++ b/libcxx/test/libcxx/containers/unord/unord.map/db_insert_hint_rvalue.pass.cpp @@ -0,0 +1,42 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template ::value>::type> +// iterator insert(const_iterator p, P&& x); + +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 +#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) + +#include +#include + +#include "test_macros.h" + +int main(int, char**) +{ + { + typedef std::unordered_map C; + typedef C::iterator R; + typedef C::value_type P; + C c; + C c2; + C::const_iterator e = c2.end(); + R r = c.insert(e, P(3.5, 3)); + assert(false); + } + + return 0; +} diff --git a/libcxx/test/libcxx/containers/unord/unord.map/db_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/db_iterators_7.pass.cpp index 9ff6bafc3adef..513c56034a685 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/db_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/db_iterators_7.pass.cpp @@ -10,8 +10,10 @@ // Increment iterator past end. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -50,13 +52,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/unord/unord.map/db_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/db_iterators_8.pass.cpp index ef383aa9f5d97..f12ba00cabc8d 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/db_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/db_iterators_8.pass.cpp @@ -10,8 +10,10 @@ // Dereference non-dereferenceable iterator. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -46,13 +48,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/unord/unord.map/db_local_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/db_local_iterators_7.pass.cpp index 5c2b4024ae9a7..19b0ee7c2674c 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/db_local_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/db_local_iterators_7.pass.cpp @@ -10,8 +10,10 @@ // Increment local_iterator past end. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -47,13 +49,3 @@ int main(int, char**) #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/unord/unord.map/db_local_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/db_local_iterators_8.pass.cpp index 8e76f1bda459c..d696d54336a99 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/db_local_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/db_local_iterators_8.pass.cpp @@ -10,8 +10,10 @@ // Dereference non-dereferenceable iterator. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -44,13 +46,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/unord/unord.map/db_move.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/db_move.pass.cpp new file mode 100644 index 0000000000000..5ae9a1403c493 --- /dev/null +++ b/libcxx/test/libcxx/containers/unord/unord.map/db_move.pass.cpp @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// unordered_map(unordered_map&& u); + +// UNSUPPORTED: c++03 + +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 +#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) + +#include +#include +#include + +#include "test_macros.h" + +int main(int, char**) +{ + { + std::unordered_map s1 = {{1, 1}, {2, 2}, {3, 3}}; + std::unordered_map::iterator i = s1.begin(); + std::pair k = *i; + std::unordered_map s2 = std::move(s1); + assert(*i == k); + s2.erase(i); + assert(s2.size() == 2); + } + + return 0; +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp similarity index 85% rename from libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp index 9ed47578d0fe6..a3873ec8c5e99 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator position) with end() -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -30,13 +32,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp similarity index 86% rename from libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp index 0b44f0be2e961..7aa39f2000cbe 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator position) with iterator from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -33,13 +35,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db1.pass.cpp similarity index 87% rename from libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db1.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db1.pass.cpp index 6ca0f7a615ad2..841b90073c551 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db1.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with first iterator from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -32,13 +34,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db2.pass.cpp similarity index 87% rename from libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db2.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db2.pass.cpp index 7c714abc69e03..b124a94b7f593 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db2.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with second iterator from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -32,13 +34,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db3.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db3.pass.cpp similarity index 87% rename from libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db3.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db3.pass.cpp index 9061bb02a6438..c61cfde6620c4 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db3.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db3.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with both iterators from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -32,13 +34,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db4.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db4.pass.cpp similarity index 86% rename from libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db4.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db4.pass.cpp index 0edd67a585188..4a485c3ce7340 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db4.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db4.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with a bad range -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -31,13 +33,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.swap/db_swap_1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.swap/db_swap_1.pass.cpp similarity index 90% rename from libcxx/test/std/containers/unord/unord.map/unord.map.swap/db_swap_1.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.map/unord.map.swap/db_swap_1.pass.cpp index 6a80c09f9a7a9..3e01d659417fe 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.swap/db_swap_1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.swap/db_swap_1.pass.cpp @@ -14,9 +14,11 @@ // void swap(unordered_map& x, unordered_map& y); -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) -#endif #include #include @@ -25,7 +27,6 @@ int main(int, char**) { -#if _LIBCPP_DEBUG >= 1 { typedef std::pair P; P a1[] = {P(1, 1), P(3, 3), P(7, 7), P(9, 9), P(10, 10)}; @@ -41,7 +42,6 @@ int main(int, char**) c1.erase(i1); assert(false); } -#endif - return 0; + return 0; } diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/db_insert_hint_const_lvalue.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/db_insert_hint_const_lvalue.pass.cpp new file mode 100644 index 0000000000000..de8b504f10e97 --- /dev/null +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/db_insert_hint_const_lvalue.pass.cpp @@ -0,0 +1,39 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// iterator insert(const_iterator p, const value_type& x); + +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 +#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) + +#include +#include + +#include "test_macros.h" + +int main(int, char**) +{ + { + typedef std::unordered_multimap C; + typedef C::iterator R; + typedef C::value_type P; + C c; + C c2; + C::const_iterator e = c2.end(); + P v(3.5, 3); + R r = c.insert(e, v); + assert(false); + } + + return 0; +} diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/db_insert_hint_rvalue.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/db_insert_hint_rvalue.pass.cpp new file mode 100644 index 0000000000000..47bfb4b87924a --- /dev/null +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/db_insert_hint_rvalue.pass.cpp @@ -0,0 +1,42 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template ::value>::type> +// iterator insert(const_iterator p, P&& x); + +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 +#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) + +#include +#include + +#include "test_macros.h" + +int main(int, char**) +{ + { + typedef std::unordered_multimap C; + typedef C::iterator R; + typedef C::value_type P; + C c; + C c2; + C::const_iterator e = c2.end(); + R r = c.insert(e, P(3.5, 3)); + assert(false); + } + + return 0; +} diff --git a/libcxx/test/std/containers/unord/unord.multimap/db_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/db_iterators_7.pass.cpp similarity index 89% rename from libcxx/test/std/containers/unord/unord.multimap/db_iterators_7.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multimap/db_iterators_7.pass.cpp index 871bd791b9373..117883020f57b 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/db_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/db_iterators_7.pass.cpp @@ -10,8 +10,10 @@ // Increment iterator past end. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -50,13 +52,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multimap/db_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/db_iterators_8.pass.cpp similarity index 89% rename from libcxx/test/std/containers/unord/unord.multimap/db_iterators_8.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multimap/db_iterators_8.pass.cpp index 821de2553a591..a5861fb8bad96 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/db_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/db_iterators_8.pass.cpp @@ -10,8 +10,10 @@ // Dereference non-dereferenceable iterator. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -46,13 +48,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multimap/db_local_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/db_local_iterators_7.pass.cpp similarity index 88% rename from libcxx/test/std/containers/unord/unord.multimap/db_local_iterators_7.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multimap/db_local_iterators_7.pass.cpp index 9da1b6113be02..a817f8108c980 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/db_local_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/db_local_iterators_7.pass.cpp @@ -10,8 +10,10 @@ // Increment local_iterator past end. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -47,13 +49,3 @@ int main(int, char**) #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multimap/db_local_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/db_local_iterators_8.pass.cpp similarity index 89% rename from libcxx/test/std/containers/unord/unord.multimap/db_local_iterators_8.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multimap/db_local_iterators_8.pass.cpp index dd56647d140ec..9ac363e096805 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/db_local_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/db_local_iterators_8.pass.cpp @@ -10,8 +10,10 @@ // Dereference non-dereferenceable iterator. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -44,13 +46,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/db_move.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/db_move.pass.cpp new file mode 100644 index 0000000000000..3b1f23ac91a46 --- /dev/null +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/db_move.pass.cpp @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// unordered_multimap(unordered_multimap&& u); + +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 +#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) + +#include +#include +#include + +#include "test_macros.h" + +int main(int, char**) +{ + { + std::unordered_multimap s1 = {{1, 1}, {2, 2}, {3, 3}}; + std::unordered_multimap::iterator i = s1.begin(); + std::pair k = *i; + std::unordered_multimap s2 = std::move(s1); + assert(*i == k); + s2.erase(i); + assert(s2.size() == 2); + } + + return 0; +} diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db1.pass.cpp similarity index 85% rename from libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db1.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db1.pass.cpp index 940db985cbc7e..da9362270a64f 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db1.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator position) with end() -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -30,13 +32,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db2.pass.cpp similarity index 87% rename from libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db2.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db2.pass.cpp index 4da7e60a6fa5a..0e99ca4cefa7c 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db2.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator position) with iterator from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -33,13 +35,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp similarity index 87% rename from libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp index b14c85dabd1a4..f8412d94e4560 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with first iterator from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -32,13 +34,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp similarity index 87% rename from libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp index 03be37ae73227..a028e11390d91 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with second iterator from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -32,13 +34,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp similarity index 87% rename from libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp index cd1892ee17d6e..5506af55707d8 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with both iterators from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -32,13 +34,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp similarity index 86% rename from libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp index 7ecf36646db3d..97119b843b2cc 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with a bad range -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -31,13 +33,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp similarity index 90% rename from libcxx/test/std/containers/unord/unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp index 65fb5ae37860a..73d9dc311fb2f 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp @@ -14,9 +14,11 @@ // void swap(unordered_multimap& x, unordered_multimap& y); -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) -#endif #include #include @@ -25,7 +27,6 @@ int main(int, char**) { -#if _LIBCPP_DEBUG >= 1 { typedef std::pair P; P a1[] = {P(1, 1), P(3, 3), P(7, 7), P(9, 9), P(10, 10)}; @@ -41,7 +42,6 @@ int main(int, char**) c1.erase(i1); assert(false); } -#endif - return 0; + return 0; } diff --git a/libcxx/test/libcxx/containers/unord/unord.multiset/db_insert_hint_const_lvalue.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/db_insert_hint_const_lvalue.pass.cpp new file mode 100644 index 0000000000000..de604c1aca3cd --- /dev/null +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/db_insert_hint_const_lvalue.pass.cpp @@ -0,0 +1,39 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// iterator insert(const_iterator p, const value_type& x); + +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 +#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) + +#include +#include + +#include "test_macros.h" + +int main(int, char**) +{ + { + typedef std::unordered_multiset C; + typedef C::iterator R; + typedef C::value_type P; + C c; + C c2; + C::const_iterator e = c2.end(); + P v(3.5); + R r = c.insert(e, v); + assert(false); + } + + return 0; +} diff --git a/libcxx/test/std/containers/unord/unord.multiset/db_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/db_iterators_7.pass.cpp similarity index 88% rename from libcxx/test/std/containers/unord/unord.multiset/db_iterators_7.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multiset/db_iterators_7.pass.cpp index 0b401e66d708b..89d3a5737d782 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/db_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/db_iterators_7.pass.cpp @@ -10,8 +10,10 @@ // Increment iterator past end. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -48,13 +50,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multiset/db_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/db_iterators_8.pass.cpp similarity index 88% rename from libcxx/test/std/containers/unord/unord.multiset/db_iterators_8.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multiset/db_iterators_8.pass.cpp index 88280e6353d3d..579bd84c97191 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/db_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/db_iterators_8.pass.cpp @@ -10,8 +10,10 @@ // Dereference non-dereferenceable iterator. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -44,13 +46,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multiset/db_local_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/db_local_iterators_7.pass.cpp similarity index 88% rename from libcxx/test/std/containers/unord/unord.multiset/db_local_iterators_7.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multiset/db_local_iterators_7.pass.cpp index 5ec15771c49ce..c85ed1da83551 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/db_local_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/db_local_iterators_7.pass.cpp @@ -10,8 +10,10 @@ // Increment local_iterator past end. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -47,13 +49,3 @@ int main(int, char**) #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multiset/db_local_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/db_local_iterators_8.pass.cpp similarity index 88% rename from libcxx/test/std/containers/unord/unord.multiset/db_local_iterators_8.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multiset/db_local_iterators_8.pass.cpp index 0d92b9578166d..597edd0fd521b 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/db_local_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/db_local_iterators_8.pass.cpp @@ -10,8 +10,10 @@ // Dereference non-dereferenceable iterator. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -44,13 +46,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/unord/unord.multiset/db_move.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/db_move.pass.cpp new file mode 100644 index 0000000000000..41da7ea4169e2 --- /dev/null +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/db_move.pass.cpp @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// unordered_multiset(unordered_multiset&& u); + +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 +#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) + +#include +#include +#include + +#include "test_macros.h" + +int main(int, char**) +{ + { + std::unordered_multiset s1 = {1, 2, 3}; + std::unordered_multiset::iterator i = s1.begin(); + int k = *i; + std::unordered_multiset s2 = std::move(s1); + assert(*i == k); + s2.erase(i); + assert(s2.size() == 2); + } + + return 0; +} diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_db1.pass.cpp similarity index 84% rename from libcxx/test/std/containers/unord/unord.multiset/erase_iter_db1.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_db1.pass.cpp index 788b5ac4f63b6..b967c59a676f4 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_db1.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator position) with end() -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -29,13 +31,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_db2.pass.cpp similarity index 86% rename from libcxx/test/std/containers/unord/unord.multiset/erase_iter_db2.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_db2.pass.cpp index 68828d5f84e81..d704dcaa78063 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_db2.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator position) with iterator from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -32,13 +34,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db1.pass.cpp similarity index 86% rename from libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db1.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db1.pass.cpp index 118fadf522154..1a183e6658cab 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db1.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with first iterator from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -31,13 +33,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db2.pass.cpp similarity index 86% rename from libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db2.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db2.pass.cpp index ad6b8cddcde1e..de3ebaaac647a 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db2.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with second iterator from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -31,13 +33,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db3.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db3.pass.cpp similarity index 86% rename from libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db3.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db3.pass.cpp index 5b5bbbae77f8f..9d36b53f51ffd 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db3.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db3.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with both iterators from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -31,13 +33,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db4.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db4.pass.cpp similarity index 85% rename from libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db4.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db4.pass.cpp index fbeb8e0752012..a68f312b80162 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db4.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db4.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with a bad range -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -30,13 +32,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp similarity index 89% rename from libcxx/test/std/containers/unord/unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp index e1f45e17baff6..2feba5c47f531 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp @@ -14,9 +14,11 @@ // void swap(unordered_multiset& x, unordered_multiset& y); -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) -#endif #include #include @@ -25,7 +27,6 @@ int main(int, char**) { -#if _LIBCPP_DEBUG >= 1 { int a1[] = {1, 3, 7, 9, 10}; int a2[] = {0, 2, 4, 5, 6, 8, 11}; @@ -40,7 +41,6 @@ int main(int, char**) c1.erase(i1); assert(false); } -#endif - return 0; + return 0; } diff --git a/libcxx/test/libcxx/containers/unord/unord.set/db_insert_hint_const_lvalue.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/db_insert_hint_const_lvalue.pass.cpp new file mode 100644 index 0000000000000..3303d089970f1 --- /dev/null +++ b/libcxx/test/libcxx/containers/unord/unord.set/db_insert_hint_const_lvalue.pass.cpp @@ -0,0 +1,39 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// iterator insert(const_iterator p, const value_type& x); + +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 +#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) + +#include +#include + +#include "test_macros.h" + +int main(int, char**) +{ + { + typedef std::unordered_set C; + typedef C::iterator R; + typedef C::value_type P; + C c; + C c2; + C::const_iterator e = c2.end(); + P v(3.5); + R r = c.insert(e, v); + assert(false); + } + + return 0; +} diff --git a/libcxx/test/std/containers/unord/unord.set/db_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/db_iterators_7.pass.cpp similarity index 88% rename from libcxx/test/std/containers/unord/unord.set/db_iterators_7.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.set/db_iterators_7.pass.cpp index 0de979ae27a5d..12e56ea1bce8c 100644 --- a/libcxx/test/std/containers/unord/unord.set/db_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/db_iterators_7.pass.cpp @@ -10,8 +10,10 @@ // Increment iterator past end. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -48,13 +50,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.set/db_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/db_iterators_8.pass.cpp similarity index 88% rename from libcxx/test/std/containers/unord/unord.set/db_iterators_8.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.set/db_iterators_8.pass.cpp index b6acf233276fb..3333be8959fdc 100644 --- a/libcxx/test/std/containers/unord/unord.set/db_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/db_iterators_8.pass.cpp @@ -10,8 +10,10 @@ // Dereference non-dereferenceable iterator. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -44,13 +46,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.set/db_local_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/db_local_iterators_7.pass.cpp similarity index 88% rename from libcxx/test/std/containers/unord/unord.set/db_local_iterators_7.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.set/db_local_iterators_7.pass.cpp index df0f20bcb074a..f003c2b2d763c 100644 --- a/libcxx/test/std/containers/unord/unord.set/db_local_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/db_local_iterators_7.pass.cpp @@ -10,8 +10,10 @@ // Increment local_iterator past end. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -47,13 +49,3 @@ int main(int, char**) #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.set/db_local_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/db_local_iterators_8.pass.cpp similarity index 88% rename from libcxx/test/std/containers/unord/unord.set/db_local_iterators_8.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.set/db_local_iterators_8.pass.cpp index 71121025192d7..999ec8b1e48be 100644 --- a/libcxx/test/std/containers/unord/unord.set/db_local_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/db_local_iterators_8.pass.cpp @@ -10,8 +10,10 @@ // Dereference non-dereferenceable iterator. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -45,13 +47,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/containers/unord/unord.set/db_move.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/db_move.pass.cpp new file mode 100644 index 0000000000000..02f8368805289 --- /dev/null +++ b/libcxx/test/libcxx/containers/unord/unord.set/db_move.pass.cpp @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// unordered_set(unordered_set&& u); + +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 +#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) + +#include +#include +#include + +#include "test_macros.h" + +int main(int, char**) +{ + { + std::unordered_set s1 = {1, 2, 3}; + std::unordered_set::iterator i = s1.begin(); + int k = *i; + std::unordered_set s2 = std::move(s1); + assert(*i == k); + s2.erase(i); + assert(s2.size() == 2); + } + + return 0; +} diff --git a/libcxx/test/std/containers/unord/unord.set/erase_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_db1.pass.cpp similarity index 84% rename from libcxx/test/std/containers/unord/unord.set/erase_iter_db1.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.set/erase_iter_db1.pass.cpp index 063ed931738fc..b5ddd8ca1b680 100644 --- a/libcxx/test/std/containers/unord/unord.set/erase_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_db1.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator position) with end() -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -29,13 +31,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.set/erase_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_db2.pass.cpp similarity index 86% rename from libcxx/test/std/containers/unord/unord.set/erase_iter_db2.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.set/erase_iter_db2.pass.cpp index 30dd46f11be07..bd14a3c0e6ca2 100644 --- a/libcxx/test/std/containers/unord/unord.set/erase_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_db2.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator position) with iterator from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -32,13 +34,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db1.pass.cpp similarity index 86% rename from libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db1.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db1.pass.cpp index a84b0601e8418..70a1afb9c4a17 100644 --- a/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db1.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with first iterator from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -31,13 +33,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db2.pass.cpp similarity index 86% rename from libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db2.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db2.pass.cpp index 9fb3bc365803f..88f33d5d03be9 100644 --- a/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db2.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with second iterator from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -31,13 +33,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db3.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db3.pass.cpp similarity index 86% rename from libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db3.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db3.pass.cpp index 9f056e8373ac5..8aa1b5a9390ad 100644 --- a/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db3.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db3.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with both iterators from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -31,13 +33,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db4.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db4.pass.cpp similarity index 85% rename from libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db4.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db4.pass.cpp index f56297dc74b04..0922c65865d4c 100644 --- a/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db4.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db4.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with a bad range -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -30,13 +32,3 @@ int main(int, char**) assert(false); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/containers/unord/unord.set/unord.set.swap/db_swap_1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/unord.set.swap/db_swap_1.pass.cpp similarity index 88% rename from libcxx/test/std/containers/unord/unord.set/unord.set.swap/db_swap_1.pass.cpp rename to libcxx/test/libcxx/containers/unord/unord.set/unord.set.swap/db_swap_1.pass.cpp index 7682a274690b6..b65cc69c9e72f 100644 --- a/libcxx/test/std/containers/unord/unord.set/unord.set.swap/db_swap_1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/unord.set.swap/db_swap_1.pass.cpp @@ -14,9 +14,11 @@ // void swap(unordered_set& x, unordered_set& y); -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) -#endif #include #include @@ -25,7 +27,6 @@ int main(int, char**) { -#if _LIBCPP_DEBUG >= 1 { int a1[] = {1, 3, 7, 9, 10}; int a2[] = {0, 2, 4, 5, 6, 8, 11}; @@ -40,7 +41,6 @@ int main(int, char**) c1.erase(i1); assert(false); } -#endif - return 0; + return 0; } diff --git a/libcxx/test/std/strings/basic.string/string.access/db_back.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.access/db_back.pass.cpp similarity index 87% rename from libcxx/test/std/strings/basic.string/string.access/db_back.pass.cpp rename to libcxx/test/libcxx/strings/basic.string/string.access/db_back.pass.cpp index 548bf01087569..31000d092799c 100644 --- a/libcxx/test/std/strings/basic.string/string.access/db_back.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.access/db_back.pass.cpp @@ -10,8 +10,10 @@ // Call back() on empty container. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -44,13 +46,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/strings/basic.string/string.access/db_cback.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.access/db_cback.pass.cpp similarity index 87% rename from libcxx/test/std/strings/basic.string/string.access/db_cback.pass.cpp rename to libcxx/test/libcxx/strings/basic.string/string.access/db_cback.pass.cpp index 47cda4983bf63..61d504f9bf3e9 100644 --- a/libcxx/test/std/strings/basic.string/string.access/db_cback.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.access/db_cback.pass.cpp @@ -10,8 +10,10 @@ // Call back() on empty const container. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -40,13 +42,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/strings/basic.string/string.access/db_cfront.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.access/db_cfront.pass.cpp similarity index 87% rename from libcxx/test/std/strings/basic.string/string.access/db_cfront.pass.cpp rename to libcxx/test/libcxx/strings/basic.string/string.access/db_cfront.pass.cpp index 12b5e511b241d..654c575d5b263 100644 --- a/libcxx/test/std/strings/basic.string/string.access/db_cfront.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.access/db_cfront.pass.cpp @@ -10,8 +10,10 @@ // Call front() on empty const container. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -40,13 +42,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/strings/basic.string/string.access/db_cindex.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.access/db_cindex.pass.cpp similarity index 87% rename from libcxx/test/std/strings/basic.string/string.access/db_cindex.pass.cpp rename to libcxx/test/libcxx/strings/basic.string/string.access/db_cindex.pass.cpp index a369cb6234f92..2a5267eef875c 100644 --- a/libcxx/test/std/strings/basic.string/string.access/db_cindex.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.access/db_cindex.pass.cpp @@ -10,8 +10,10 @@ // Index const string out of bounds. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -42,13 +44,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/strings/basic.string/string.access/db_front.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.access/db_front.pass.cpp similarity index 87% rename from libcxx/test/std/strings/basic.string/string.access/db_front.pass.cpp rename to libcxx/test/libcxx/strings/basic.string/string.access/db_front.pass.cpp index 6f8dcd5df7c99..c73c536df6c03 100644 --- a/libcxx/test/std/strings/basic.string/string.access/db_front.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.access/db_front.pass.cpp @@ -10,8 +10,10 @@ // Call front() on empty container. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -44,13 +46,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/strings/basic.string/string.access/db_index.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.access/db_index.pass.cpp similarity index 87% rename from libcxx/test/std/strings/basic.string/string.access/db_index.pass.cpp rename to libcxx/test/libcxx/strings/basic.string/string.access/db_index.pass.cpp index bc2502316dab7..ef250b01b3c82 100644 --- a/libcxx/test/std/strings/basic.string/string.access/db_index.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.access/db_index.pass.cpp @@ -10,8 +10,10 @@ // Index string out of bounds. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -42,13 +44,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_2.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_2.pass.cpp similarity index 87% rename from libcxx/test/std/strings/basic.string/string.iterators/db_iterators_2.pass.cpp rename to libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_2.pass.cpp index 41323e0458d45..df165b70f5d74 100644 --- a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_2.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_2.pass.cpp @@ -10,8 +10,10 @@ // Compare iterators from different containers with <. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -44,13 +46,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_3.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_3.pass.cpp similarity index 87% rename from libcxx/test/std/strings/basic.string/string.iterators/db_iterators_3.pass.cpp rename to libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_3.pass.cpp index ea0f2866310c9..9f5146689f659 100644 --- a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_3.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_3.pass.cpp @@ -10,8 +10,10 @@ // Subtract iterators from different containers with <. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -44,13 +46,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_4.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_4.pass.cpp similarity index 88% rename from libcxx/test/std/strings/basic.string/string.iterators/db_iterators_4.pass.cpp rename to libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_4.pass.cpp index 07b1b825c65a8..28aa876ce537b 100644 --- a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_4.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_4.pass.cpp @@ -10,8 +10,10 @@ // Index iterator out of bounds. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -44,13 +46,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_5.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_5.pass.cpp similarity index 88% rename from libcxx/test/std/strings/basic.string/string.iterators/db_iterators_5.pass.cpp rename to libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_5.pass.cpp index a183282f40909..9fd4dec8aa0c7 100644 --- a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_5.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_5.pass.cpp @@ -10,8 +10,10 @@ // Add to iterator out of bounds. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -48,13 +50,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_6.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_6.pass.cpp similarity index 88% rename from libcxx/test/std/strings/basic.string/string.iterators/db_iterators_6.pass.cpp rename to libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_6.pass.cpp index a24c370dea226..802d6b4d986a7 100644 --- a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_6.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_6.pass.cpp @@ -10,8 +10,10 @@ // Decrement iterator prior to begin. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -46,13 +48,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_7.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_7.pass.cpp similarity index 88% rename from libcxx/test/std/strings/basic.string/string.iterators/db_iterators_7.pass.cpp rename to libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_7.pass.cpp index 16a20878658d5..86b175f2e4f1c 100644 --- a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_7.pass.cpp @@ -10,8 +10,10 @@ // Increment iterator past end. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -46,13 +48,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_8.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_8.pass.cpp similarity index 87% rename from libcxx/test/std/strings/basic.string/string.iterators/db_iterators_8.pass.cpp rename to libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_8.pass.cpp index 2275020b4d73b..ca3521de189d5 100644 --- a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_8.pass.cpp @@ -10,8 +10,10 @@ // Dereference non-dereferenceable iterator. -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -42,13 +44,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/clear_and_shrink_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/clear_and_shrink_db1.pass.cpp index 6da77e4365b6f..5369a8238f9ab 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/clear_and_shrink_db1.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/clear_and_shrink_db1.pass.cpp @@ -10,8 +10,10 @@ // Call __clear_and_shrink() and ensure string invariants hold -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -39,13 +41,3 @@ int main(int, char**) assert(l.capacity() < cap); } } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_db1.pass.cpp index a5258f95d4d2e..f9dd19c2ce7c9 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_db1.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator position) with end() -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -40,13 +42,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_db2.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_db2.pass.cpp index 099ce74f74325..3e1b5fc952821 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_db2.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator position) with iterator from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -42,13 +44,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db1.pass.cpp index 1802f6287db29..ce0690f93ca8c 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db1.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with first iterator from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -40,13 +42,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db2.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db2.pass.cpp index fe65851ef7756..87e2f50389f55 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db2.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with second iterator from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -40,13 +42,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db3.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db3.pass.cpp index fad146451262e..848f34447f0fa 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db3.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db3.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with both iterators from another container -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -40,13 +42,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db4.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db4.pass.cpp index 3186ad4492b02..cb87f1f3769c4 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db4.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db4.pass.cpp @@ -10,8 +10,10 @@ // Call erase(const_iterator first, const_iterator last); with a bad range -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) #include @@ -38,13 +40,3 @@ int main(int, char**) } #endif } - -#else - -int main(int, char**) -{ - - return 0; -} - -#endif diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_pop_back_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_pop_back_db1.pass.cpp index 2516a69f51f21..af93f57aae061 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_pop_back_db1.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_pop_back_db1.pass.cpp @@ -10,9 +10,11 @@ // void pop_back(); -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) -#endif #include #include @@ -21,13 +23,11 @@ int main(int, char**) { -#if _LIBCPP_DEBUG >= 1 { std::string s; s.pop_back(); assert(false); } -#endif - return 0; + return 0; } diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_char_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_char_db1.pass.cpp index eb07fe88a55fe..e5814888d9fa7 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_char_db1.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_char_db1.pass.cpp @@ -10,9 +10,11 @@ // iterator insert(const_iterator p, charT c); -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) -#endif #include #include @@ -23,7 +25,6 @@ int main(int, char**) { -#if _LIBCPP_DEBUG >= 1 { typedef std::string S; S s; @@ -31,7 +32,6 @@ int main(int, char**) s.insert(s2.begin(), '1'); assert(false); } -#endif - return 0; + return 0; } diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_iter_iter_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_iter_iter_db1.pass.cpp new file mode 100644 index 0000000000000..5bbe1468bf35f --- /dev/null +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_iter_iter_db1.pass.cpp @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template +// iterator insert(const_iterator p, InputIterator first, InputIterator last); + +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 +#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) + +#include +#include + +#include "test_macros.h" + + +int main(int, char**) +{ + { + std::string v; + std::string v2; + char a[] = "123"; + const int N = sizeof(a)/sizeof(a[0]); + std::string::iterator i = v.insert(v2.cbegin() + 10, a, a+N); + assert(false); + } + + return 0; +} diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_size_char_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_size_char_db1.pass.cpp index 4db97609ff51e..2be5b082ed3d5 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_size_char_db1.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_size_char_db1.pass.cpp @@ -10,9 +10,11 @@ // iterator insert(const_iterator p, size_type n, charT c); -#if _LIBCPP_DEBUG >= 1 +// This test requires debug mode, which the library on macOS doesn't have. +// UNSUPPORTED: with_system_cxx_lib=macosx + +#define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) -#endif #include #include @@ -21,14 +23,12 @@ int main(int, char**) { -#if _LIBCPP_DEBUG >= 1 { std::string s; std::string s2; s.insert(s2.begin(), 1, 'a'); assert(false); } -#endif - return 0; + return 0; } diff --git a/libcxx/test/std/containers/unord/unord.map/bucket.pass.cpp b/libcxx/test/std/containers/unord/unord.map/bucket.pass.cpp index 47bc933c70a28..93c177a7df45b 100644 --- a/libcxx/test/std/containers/unord/unord.map/bucket.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/bucket.pass.cpp @@ -14,10 +14,6 @@ // size_type bucket(const key_type& __k) const; -#ifdef _LIBCPP_DEBUG -#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) -#endif - #include #include #include @@ -66,14 +62,6 @@ int main(int, char**) LIBCPP_ASSERT(c.bucket(i) == i % bc); } #endif -#if _LIBCPP_DEBUG_LEVEL >= 1 - { - typedef std::unordered_map C; - C c; - (void) c.bucket(3); - assert(false); - } -#endif - return 0; + return 0; } diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.pass.cpp index 1be50c4e88df4..6efaa63eeb4ef 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.pass.cpp @@ -165,17 +165,6 @@ int main(int, char**) assert(c0.empty()); } -#if _LIBCPP_DEBUG >= 1 - { - std::unordered_map s1 = {{1, 1}, {2, 2}, {3, 3}}; - std::unordered_map::iterator i = s1.begin(); - std::pair k = *i; - std::unordered_map s2 = std::move(s1); - assert(*i == k); - s2.erase(i); - assert(s2.size() == 2); - } -#endif - return 0; + return 0; } diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.pass.cpp index 10032c090673c..cf18daf767be2 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.pass.cpp @@ -14,10 +14,6 @@ // iterator insert(const_iterator p, const value_type& x); -#if _LIBCPP_DEBUG >= 1 -#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) -#endif - #include #include @@ -68,19 +64,6 @@ int main(int, char**) do_insert_hint_const_lvalue_test(); } #endif -#if _LIBCPP_DEBUG >= 1 - { - typedef std::unordered_map C; - typedef C::iterator R; - typedef C::value_type P; - C c; - C c2; - C::const_iterator e = c2.end(); - P v(3.5, 3); - R r = c.insert(e, v); - assert(false); - } -#endif - return 0; + return 0; } diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_rvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_rvalue.pass.cpp index 013e5b3efc7c3..48293d065d227 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_rvalue.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_rvalue.pass.cpp @@ -18,10 +18,6 @@ // class = typename enable_if::value>::type> // iterator insert(const_iterator p, P&& x); -#if _LIBCPP_DEBUG >= 1 -#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) -#endif - #include #include @@ -162,18 +158,6 @@ int main(int, char**) assert(r->first == 5.5); assert(r->second == 4); } -#if _LIBCPP_DEBUG >= 1 - { - typedef std::unordered_map C; - typedef C::iterator R; - typedef C::value_type P; - C c; - C c2; - C::const_iterator e = c2.end(); - R r = c.insert(e, P(3.5, 3)); - assert(false); - } -#endif - return 0; + return 0; } diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.pass.cpp index 047f7e6d2dd2d..a0a707c760028 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.pass.cpp @@ -202,17 +202,6 @@ int main(int, char**) assert(c0.empty()); } -#if _LIBCPP_DEBUG >= 1 - { - std::unordered_multimap s1 = {{1, 1}, {2, 2}, {3, 3}}; - std::unordered_multimap::iterator i = s1.begin(); - std::pair k = *i; - std::unordered_multimap s2 = std::move(s1); - assert(*i == k); - s2.erase(i); - assert(s2.size() == 2); - } -#endif - return 0; + return 0; } diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_hint_const_lvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_hint_const_lvalue.pass.cpp index ee7b3374ad1aa..cb79ddff41f78 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_hint_const_lvalue.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_hint_const_lvalue.pass.cpp @@ -14,10 +14,6 @@ // iterator insert(const_iterator p, const value_type& x); -#if _LIBCPP_DEBUG >= 1 -#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) -#endif - #include #include @@ -67,19 +63,6 @@ int main(int, char**) do_insert_const_lvalue_test(); } #endif -#if _LIBCPP_DEBUG >= 1 - { - typedef std::unordered_multimap C; - typedef C::iterator R; - typedef C::value_type P; - C c; - C c2; - C::const_iterator e = c2.end(); - P v(3.5, 3); - R r = c.insert(e, v); - assert(false); - } -#endif - return 0; + return 0; } diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_hint_rvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_hint_rvalue.pass.cpp index d5d472fa3c6f6..6fa3b45fab779 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_hint_rvalue.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_hint_rvalue.pass.cpp @@ -18,10 +18,6 @@ // class = typename enable_if::value>::type> // iterator insert(const_iterator p, P&& x); -#if _LIBCPP_DEBUG >= 1 -#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) -#endif - #include #include @@ -162,18 +158,6 @@ int main(int, char**) assert(r->first == 5.5); assert(r->second == 4); } -#if _LIBCPP_DEBUG >= 1 - { - typedef std::unordered_multimap C; - typedef C::iterator R; - typedef C::value_type P; - C c; - C c2; - C::const_iterator e = c2.end(); - R r = c.insert(e, P(3.5, 3)); - assert(false); - } -#endif - return 0; + return 0; } diff --git a/libcxx/test/std/containers/unord/unord.multiset/insert_hint_const_lvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/insert_hint_const_lvalue.pass.cpp index 981481b6cad79..461428e56eb0c 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/insert_hint_const_lvalue.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/insert_hint_const_lvalue.pass.cpp @@ -14,10 +14,6 @@ // iterator insert(const_iterator p, const value_type& x); -#if _LIBCPP_DEBUG >= 1 -#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) -#endif - #include #include @@ -62,19 +58,6 @@ int main(int, char**) do_insert_hint_const_lvalue_test(); } #endif -#if _LIBCPP_DEBUG >= 1 - { - typedef std::unordered_multiset C; - typedef C::iterator R; - typedef C::value_type P; - C c; - C c2; - C::const_iterator e = c2.end(); - P v(3.5); - R r = c.insert(e, v); - assert(false); - } -#endif - return 0; + return 0; } diff --git a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move.pass.cpp index e480c7db68e1c..fbb4f5fbe11a9 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move.pass.cpp @@ -160,17 +160,6 @@ int main(int, char**) assert(c0.empty()); } -#if _LIBCPP_DEBUG >= 1 - { - std::unordered_multiset s1 = {1, 2, 3}; - std::unordered_multiset::iterator i = s1.begin(); - int k = *i; - std::unordered_multiset s2 = std::move(s1); - assert(*i == k); - s2.erase(i); - assert(s2.size() == 2); - } -#endif - return 0; + return 0; } diff --git a/libcxx/test/std/containers/unord/unord.set/insert_hint_const_lvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.set/insert_hint_const_lvalue.pass.cpp index 0f7c8812e9f13..0532b6813f385 100644 --- a/libcxx/test/std/containers/unord/unord.set/insert_hint_const_lvalue.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/insert_hint_const_lvalue.pass.cpp @@ -14,10 +14,6 @@ // iterator insert(const_iterator p, const value_type& x); -#if _LIBCPP_DEBUG >= 1 -#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) -#endif - #include #include @@ -62,19 +58,6 @@ int main(int, char**) do_insert_hint_const_lvalue_test(); } #endif -#if _LIBCPP_DEBUG >= 1 - { - typedef std::unordered_set C; - typedef C::iterator R; - typedef C::value_type P; - C c; - C c2; - C::const_iterator e = c2.end(); - P v(3.5); - R r = c.insert(e, v); - assert(false); - } -#endif - return 0; + return 0; } diff --git a/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move.pass.cpp b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move.pass.cpp index 8e06551a67efd..2fc64edd3918f 100644 --- a/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move.pass.cpp @@ -160,17 +160,6 @@ int main(int, char**) assert(c0.empty()); } -#if _LIBCPP_DEBUG >= 1 - { - std::unordered_set s1 = {1, 2, 3}; - std::unordered_set::iterator i = s1.begin(); - int k = *i; - std::unordered_set s2 = std::move(s1); - assert(*i == k); - s2.erase(i); - assert(s2.size() == 2); - } -#endif - return 0; + return 0; } diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/iter_iter_iter.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/iter_iter_iter.pass.cpp index d52a579349df0..67ba33debf7d0 100644 --- a/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/iter_iter_iter.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/iter_iter_iter.pass.cpp @@ -11,10 +11,6 @@ // template // iterator insert(const_iterator p, InputIterator first, InputIterator last); -#if _LIBCPP_DEBUG >= 1 -#define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) -#endif - #include #include @@ -159,16 +155,6 @@ int main(int, char**) test_exceptions(S(), 0, TIter(s, s+10, 6, TIter::TAComparison), TIter()); } #endif -#if _LIBCPP_DEBUG >= 1 - { - std::string v; - std::string v2; - char a[] = "123"; - const int N = sizeof(a)/sizeof(a[0]); - std::string::iterator i = v.insert(v2.cbegin() + 10, a, a+N); - assert(false); - } -#endif { // test inserting into self typedef std::string S; From 3f1fd59de3002e3d5a4eca98cd49c45755ab0110 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Mon, 5 Oct 2020 22:16:59 +0200 Subject: [PATCH 068/321] [SLC] Optimize mempcpy_chk to mempcpy As reported in PR46735: void* f(void *d, const void *s, size_t l) { return __builtin___mempcpy_chk(d, s, l, __builtin_object_size(d, 0)); } This can be optimized to `return mempcpy(d, s, l);`. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D86019 --- .../llvm/Analysis/TargetLibraryInfo.def | 3 +++ .../llvm/Transforms/Utils/BuildLibCalls.h | 4 ++++ .../llvm/Transforms/Utils/SimplifyLibCalls.h | 1 + llvm/lib/Analysis/TargetLibraryInfo.cpp | 1 + llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 9 ++++++++ .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 15 ++++++++++++ .../Transforms/InstCombine/fortify-folding.ll | 23 +++++++++++++++++++ 7 files changed, 56 insertions(+) diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index 36b39f4a0e231..7501d1a304e08 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -366,6 +366,9 @@ TLI_DEFINE_STRING_INTERNAL("__memcpy_chk") /// void *__memmove_chk(void *s1, const void *s2, size_t n, size_t s1size); TLI_DEFINE_ENUM_INTERNAL(memmove_chk) TLI_DEFINE_STRING_INTERNAL("__memmove_chk") +/// void *__mempcpy_chk(void *s1, const void *s2, size_t n, size_t s1size); +TLI_DEFINE_ENUM_INTERNAL(mempcpy_chk) +TLI_DEFINE_STRING_INTERNAL("__mempcpy_chk") /// void *__memset_chk(void *s, char v, size_t n, size_t s1size); TLI_DEFINE_ENUM_INTERNAL(memset_chk) TLI_DEFINE_STRING_INTERNAL("__memset_chk") diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h index 90517e806e02c..e7d41933a6c9f 100644 --- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h @@ -96,6 +96,10 @@ namespace llvm { IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI); + /// Emit a call to the mempcpy function. + Value *emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, + const DataLayout &DL, const TargetLibraryInfo *TLI); + /// Emit a call to the memchr function. This assumes that Ptr is a pointer, /// Val is an i32 value, and Len is an 'intptr_t' value. Value *emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 2819a3468766d..8703434e16967 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -59,6 +59,7 @@ class FortifiedLibCallSimplifier { Value *optimizeStrpCpyChk(CallInst *CI, IRBuilderBase &B, LibFunc Func); Value *optimizeStrpNCpyChk(CallInst *CI, IRBuilderBase &B, LibFunc Func); Value *optimizeStrLenChk(CallInst *CI, IRBuilderBase &B); + Value *optimizeMemPCpyChk(CallInst *CI, IRBuilderBase &B); Value *optimizeMemCCpyChk(CallInst *CI, IRBuilderBase &B); Value *optimizeSNPrintfChk(CallInst *CI, IRBuilderBase &B); Value *optimizeSPrintfChk(CallInst *CI,IRBuilderBase &B); diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index e629d04e5deec..d9b263b3967c4 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -847,6 +847,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy()); case LibFunc_memcpy_chk: + case LibFunc_mempcpy_chk: case LibFunc_memmove_chk: --NumParams; if (!IsSizeTTy(FTy.getParamType(NumParams))) diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 2a0cdf6176109..86e9b48826e3d 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -1076,6 +1076,15 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, return CI; } +Value *llvm::emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { + LLVMContext &Context = B.GetInsertBlock()->getContext(); + return emitLibCall( + LibFunc_mempcpy, B.getInt8PtrTy(), + {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)}, + {Dst, Src, Len}, B, TLI); +} + Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { LLVMContext &Context = B.GetInsertBlock()->getContext(); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index bcda3f3440a34..a904d2550562e 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -3292,6 +3292,19 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, return nullptr; } +Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI, + IRBuilderBase &B) { + const DataLayout &DL = CI->getModule()->getDataLayout(); + if (isFortifiedCallFoldable(CI, 3, 2)) + if (Value *Call = emitMemPCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, DL, TLI)) { + CallInst *NewCI = cast(Call); + NewCI->setAttributes(CI->getAttributes()); + return NewCI; + } + return nullptr; +} + Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, IRBuilderBase &B, LibFunc Func) { @@ -3481,6 +3494,8 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI, switch (Func) { case LibFunc_memcpy_chk: return optimizeMemCpyChk(CI, Builder); + case LibFunc_mempcpy_chk: + return optimizeMemPCpyChk(CI, Builder); case LibFunc_memmove_chk: return optimizeMemMoveChk(CI, Builder); case LibFunc_memset_chk: diff --git a/llvm/test/Transforms/InstCombine/fortify-folding.ll b/llvm/test/Transforms/InstCombine/fortify-folding.ll index 2602640595e65..ea29ecc31d114 100644 --- a/llvm/test/Transforms/InstCombine/fortify-folding.ll +++ b/llvm/test/Transforms/InstCombine/fortify-folding.ll @@ -31,6 +31,28 @@ define i8* @test_not_memccpy() { ret i8* %ret } +define i8* @test_mempcpy() { +; CHECK-LABEL: @test_mempcpy( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 15, i1 false) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 15) +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + %ret = call i8* @__mempcpy_chk(i8* %dst, i8* %src, i64 15, i64 -1) + ret i8* %ret +} + +define i8* @test_not_mempcpy() { +; CHECK-LABEL: @test_not_mempcpy( +; CHECK-NEXT: [[RET:%.*]] = call i8* @__mempcpy_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 60, i64 59) +; CHECK-NEXT: ret i8* [[RET]] +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + %ret = call i8* @__mempcpy_chk(i8* %dst, i8* %src, i64 60, i64 59) + ret i8* %ret +} + define i32 @test_snprintf() { ; CHECK-LABEL: @test_snprintf( ; CHECK-NEXT: [[SNPRINTF:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 60, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0)) @@ -219,6 +241,7 @@ define i32 @test_not_vsprintf() { ret i32 %ret } +declare i8* @__mempcpy_chk(i8*, i8*, i64, i64) declare i8* @__memccpy_chk(i8*, i8*, i32, i64, i64) declare i32 @__snprintf_chk(i8*, i64, i32, i64, i8*, ...) declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...) From a4bae56ab8e9474c2bdb2640243ae7ea6a3e0619 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Mon, 5 Oct 2020 22:27:14 +0200 Subject: [PATCH 069/321] Revert "[SLC] Optimize mempcpy_chk to mempcpy" This reverts commit 3f1fd59de3002e3d5a4eca98cd49c45755ab0110. --- .../llvm/Analysis/TargetLibraryInfo.def | 3 --- .../llvm/Transforms/Utils/BuildLibCalls.h | 4 ---- .../llvm/Transforms/Utils/SimplifyLibCalls.h | 1 - llvm/lib/Analysis/TargetLibraryInfo.cpp | 1 - llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 9 -------- .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 15 ------------ .../Transforms/InstCombine/fortify-folding.ll | 23 ------------------- 7 files changed, 56 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index 7501d1a304e08..36b39f4a0e231 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -366,9 +366,6 @@ TLI_DEFINE_STRING_INTERNAL("__memcpy_chk") /// void *__memmove_chk(void *s1, const void *s2, size_t n, size_t s1size); TLI_DEFINE_ENUM_INTERNAL(memmove_chk) TLI_DEFINE_STRING_INTERNAL("__memmove_chk") -/// void *__mempcpy_chk(void *s1, const void *s2, size_t n, size_t s1size); -TLI_DEFINE_ENUM_INTERNAL(mempcpy_chk) -TLI_DEFINE_STRING_INTERNAL("__mempcpy_chk") /// void *__memset_chk(void *s, char v, size_t n, size_t s1size); TLI_DEFINE_ENUM_INTERNAL(memset_chk) TLI_DEFINE_STRING_INTERNAL("__memset_chk") diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h index e7d41933a6c9f..90517e806e02c 100644 --- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h @@ -96,10 +96,6 @@ namespace llvm { IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI); - /// Emit a call to the mempcpy function. - Value *emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, - const DataLayout &DL, const TargetLibraryInfo *TLI); - /// Emit a call to the memchr function. This assumes that Ptr is a pointer, /// Val is an i32 value, and Len is an 'intptr_t' value. Value *emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 8703434e16967..2819a3468766d 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -59,7 +59,6 @@ class FortifiedLibCallSimplifier { Value *optimizeStrpCpyChk(CallInst *CI, IRBuilderBase &B, LibFunc Func); Value *optimizeStrpNCpyChk(CallInst *CI, IRBuilderBase &B, LibFunc Func); Value *optimizeStrLenChk(CallInst *CI, IRBuilderBase &B); - Value *optimizeMemPCpyChk(CallInst *CI, IRBuilderBase &B); Value *optimizeMemCCpyChk(CallInst *CI, IRBuilderBase &B); Value *optimizeSNPrintfChk(CallInst *CI, IRBuilderBase &B); Value *optimizeSPrintfChk(CallInst *CI,IRBuilderBase &B); diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index d9b263b3967c4..e629d04e5deec 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -847,7 +847,6 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy()); case LibFunc_memcpy_chk: - case LibFunc_mempcpy_chk: case LibFunc_memmove_chk: --NumParams; if (!IsSizeTTy(FTy.getParamType(NumParams))) diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 86e9b48826e3d..2a0cdf6176109 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -1076,15 +1076,6 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, return CI; } -Value *llvm::emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, - const DataLayout &DL, const TargetLibraryInfo *TLI) { - LLVMContext &Context = B.GetInsertBlock()->getContext(); - return emitLibCall( - LibFunc_mempcpy, B.getInt8PtrTy(), - {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)}, - {Dst, Src, Len}, B, TLI); -} - Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { LLVMContext &Context = B.GetInsertBlock()->getContext(); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index a904d2550562e..bcda3f3440a34 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -3292,19 +3292,6 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, return nullptr; } -Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI, - IRBuilderBase &B) { - const DataLayout &DL = CI->getModule()->getDataLayout(); - if (isFortifiedCallFoldable(CI, 3, 2)) - if (Value *Call = emitMemPCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, DL, TLI)) { - CallInst *NewCI = cast(Call); - NewCI->setAttributes(CI->getAttributes()); - return NewCI; - } - return nullptr; -} - Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, IRBuilderBase &B, LibFunc Func) { @@ -3494,8 +3481,6 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI, switch (Func) { case LibFunc_memcpy_chk: return optimizeMemCpyChk(CI, Builder); - case LibFunc_mempcpy_chk: - return optimizeMemPCpyChk(CI, Builder); case LibFunc_memmove_chk: return optimizeMemMoveChk(CI, Builder); case LibFunc_memset_chk: diff --git a/llvm/test/Transforms/InstCombine/fortify-folding.ll b/llvm/test/Transforms/InstCombine/fortify-folding.ll index ea29ecc31d114..2602640595e65 100644 --- a/llvm/test/Transforms/InstCombine/fortify-folding.ll +++ b/llvm/test/Transforms/InstCombine/fortify-folding.ll @@ -31,28 +31,6 @@ define i8* @test_not_memccpy() { ret i8* %ret } -define i8* @test_mempcpy() { -; CHECK-LABEL: @test_mempcpy( -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 15, i1 false) -; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 15) -; - %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 - %ret = call i8* @__mempcpy_chk(i8* %dst, i8* %src, i64 15, i64 -1) - ret i8* %ret -} - -define i8* @test_not_mempcpy() { -; CHECK-LABEL: @test_not_mempcpy( -; CHECK-NEXT: [[RET:%.*]] = call i8* @__mempcpy_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 60, i64 59) -; CHECK-NEXT: ret i8* [[RET]] -; - %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 - %ret = call i8* @__mempcpy_chk(i8* %dst, i8* %src, i64 60, i64 59) - ret i8* %ret -} - define i32 @test_snprintf() { ; CHECK-LABEL: @test_snprintf( ; CHECK-NEXT: [[SNPRINTF:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 60, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0)) @@ -241,7 +219,6 @@ define i32 @test_not_vsprintf() { ret i32 %ret } -declare i8* @__mempcpy_chk(i8*, i8*, i64, i64) declare i8* @__memccpy_chk(i8*, i8*, i32, i64, i64) declare i32 @__snprintf_chk(i8*, i64, i32, i64, i8*, ...) declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...) From 32a2209b5c72571a2448bf0ffe88e96eb91b4784 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 5 Oct 2020 16:24:41 -0400 Subject: [PATCH 070/321] [libc++] NFC: Remove unused include in atomic.cpp --- libcxx/src/atomic.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp index 65d4837bb4a73..6b73ed771cd1b 100644 --- a/libcxx/src/atomic.cpp +++ b/libcxx/src/atomic.cpp @@ -13,8 +13,6 @@ #include #include -#include - #ifdef __linux__ #include From fe7245b772d5bca4a5f6cc055b18c45cc8b46902 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 5 Oct 2020 16:30:23 -0400 Subject: [PATCH 071/321] [libc++] NFC: Rename variant helpers to avoid name clashes Some system headers define __constructor and __destructor macros (for Clang attributes constructor and destructor). While this is badly behaved, it is easy for libc++ to work around this issue. --- libcxx/include/variant | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/libcxx/include/variant b/libcxx/include/variant index 33d5dc7dbeb12..e6da096f6c382 100644 --- a/libcxx/include/variant +++ b/libcxx/include/variant @@ -723,12 +723,12 @@ protected: }; template -class _LIBCPP_TEMPLATE_VIS __destructor; +class _LIBCPP_TEMPLATE_VIS __dtor; #define _LIBCPP_VARIANT_DESTRUCTOR(destructible_trait, destructor, destroy) \ template \ - class _LIBCPP_TEMPLATE_VIS __destructor<__traits<_Types...>, \ - destructible_trait> \ + class _LIBCPP_TEMPLATE_VIS __dtor<__traits<_Types...>, \ + destructible_trait> \ : public __base { \ using __base_type = __base; \ using __index_t = typename __base_type::__index_t; \ @@ -737,11 +737,11 @@ class _LIBCPP_TEMPLATE_VIS __destructor; using __base_type::__base_type; \ using __base_type::operator=; \ \ - __destructor(const __destructor&) = default; \ - __destructor(__destructor&&) = default; \ + __dtor(const __dtor&) = default; \ + __dtor(__dtor&&) = default; \ destructor \ - __destructor& operator=(const __destructor&) = default; \ - __destructor& operator=(__destructor&&) = default; \ + __dtor& operator=(const __dtor&) = default; \ + __dtor& operator=(__dtor&&) = default; \ \ protected: \ inline _LIBCPP_INLINE_VISIBILITY \ @@ -750,12 +750,12 @@ class _LIBCPP_TEMPLATE_VIS __destructor; _LIBCPP_VARIANT_DESTRUCTOR( _Trait::_TriviallyAvailable, - ~__destructor() = default;, + ~__dtor() = default;, void __destroy() noexcept { this->__index = __variant_npos<__index_t>; }); _LIBCPP_VARIANT_DESTRUCTOR( _Trait::_Available, - ~__destructor() { __destroy(); }, + ~__dtor() { __destroy(); }, void __destroy() noexcept { if (!this->valueless_by_exception()) { __visitation::__base::__visit_alt( @@ -770,14 +770,14 @@ _LIBCPP_VARIANT_DESTRUCTOR( _LIBCPP_VARIANT_DESTRUCTOR( _Trait::_Unavailable, - ~__destructor() = delete;, + ~__dtor() = delete;, void __destroy() noexcept = delete;); #undef _LIBCPP_VARIANT_DESTRUCTOR template -class _LIBCPP_TEMPLATE_VIS __constructor : public __destructor<_Traits> { - using __base_type = __destructor<_Traits>; +class _LIBCPP_TEMPLATE_VIS __ctor : public __dtor<_Traits> { + using __base_type = __dtor<_Traits>; public: using __base_type::__base_type; @@ -794,7 +794,7 @@ protected: template inline _LIBCPP_INLINE_VISIBILITY - static void __generic_construct(__constructor& __lhs, _Rhs&& __rhs) { + static void __generic_construct(__ctor& __lhs, _Rhs&& __rhs) { __lhs.__destroy(); if (!__rhs.valueless_by_exception()) { __visitation::__base::__visit_alt_at( @@ -816,10 +816,10 @@ class _LIBCPP_TEMPLATE_VIS __move_constructor; #define _LIBCPP_VARIANT_MOVE_CONSTRUCTOR(move_constructible_trait, \ move_constructor) \ template \ - class _LIBCPP_TEMPLATE_VIS __move_constructor<__traits<_Types...>, \ - move_constructible_trait> \ - : public __constructor<__traits<_Types...>> { \ - using __base_type = __constructor<__traits<_Types...>>; \ + class _LIBCPP_TEMPLATE_VIS __move_constructor<__traits<_Types...>, \ + move_constructible_trait> \ + : public __ctor<__traits<_Types...>> { \ + using __base_type = __ctor<__traits<_Types...>>; \ \ public: \ using __base_type::__base_type; \ From 477a68760b24f07a45253fb41e89368328b3a4a8 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 5 Oct 2020 16:39:33 -0400 Subject: [PATCH 072/321] [libc++] Use __has_include instead of complex logic in thread.cpp We might end up including more headers than strictly necessary this way, but it's much simpler and it makes it easier to port thread.cpp to systems not handled by the existing conditionals. --- libcxx/src/thread.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/libcxx/src/thread.cpp b/libcxx/src/thread.cpp index 5f44e9e40fc7b..e1dc972cba7f9 100644 --- a/libcxx/src/thread.cpp +++ b/libcxx/src/thread.cpp @@ -14,17 +14,21 @@ #include "vector" #include "future" #include "limits" -#include -#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) +#if __has_include() +# include +#endif + +#if __has_include() # include -# if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__APPLE__) -# include -# endif -#endif // defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) +#endif + +#if __has_include() +# include +#endif #if __has_include() -#include +# include #endif #if defined(__NetBSD__) From ed88d962953c52c76d568d90fe2d5546ea6ab543 Mon Sep 17 00:00:00 2001 From: Evandro Menezes Date: Fri, 2 Oct 2020 16:26:49 -0500 Subject: [PATCH 073/321] [RISCV] Use the extensions in the canonical order (NFC) Fix a mistake in the ordering. --- llvm/lib/Target/RISCV/RISCV.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 66eda3ba360cf..240dad1ed5cae 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -237,8 +237,8 @@ def : ProcessorModel<"sifive-e31", RocketModel, [FeatureStdExtM, def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit, FeatureStdExtM, - FeatureStdExtF, FeatureStdExtA, + FeatureStdExtF, FeatureStdExtD, FeatureStdExtC]>; From 5d6d8a2769b3a91fd65b125c2cda64ea27a894bf Mon Sep 17 00:00:00 2001 From: Evandro Menezes Date: Fri, 2 Oct 2020 16:44:32 -0500 Subject: [PATCH 074/321] [RISCV] Add SiFive cores to the CPU option Add the SiFive cores E76 and U74 using the SiFive 7 series microarchitecture. Differential Revision: https://reviews.llvm.org/D88759 --- clang/test/Driver/riscv-cpus.c | 24 +++++++++++++++---- clang/test/Misc/target-invalid-cpu-note.c | 4 ++-- .../llvm/Support/RISCVTargetParser.def | 6 +++-- llvm/lib/Target/RISCV/RISCV.td | 16 +++++++++++-- 4 files changed, 39 insertions(+), 11 deletions(-) diff --git a/clang/test/Driver/riscv-cpus.c b/clang/test/Driver/riscv-cpus.c index 15cd212e4fb40..2bd0b26f3caf1 100644 --- a/clang/test/Driver/riscv-cpus.c +++ b/clang/test/Driver/riscv-cpus.c @@ -7,12 +7,12 @@ // MCPU-ROCKET64: "-nostdsysteminc" "-target-cpu" "rocket-rv64" // MCPU-ROCKET64: "-target-feature" "+64bit" -// RUN: %clang -target riscv32 -### -c %s 2>&1 -mcpu=bullet-rv32 | FileCheck -check-prefix=MCPU-BULLET32 %s -// MCPU-BULLET32: "-nostdsysteminc" "-target-cpu" "bullet-rv32" +// RUN: %clang -target riscv32 -### -c %s 2>&1 -mcpu=sifive-7-rv32 | FileCheck -check-prefix=MCPU-SIFIVE7-32 %s +// MCPU-SIFIVE7-32: "-nostdsysteminc" "-target-cpu" "sifive-7-rv32" -// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=bullet-rv64 | FileCheck -check-prefix=MCPU-BULLET64 %s -// MCPU-BULLET64: "-nostdsysteminc" "-target-cpu" "bullet-rv64" -// MCPU-BULLET64: "-target-feature" "+64bit" +// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=sifive-7-rv64 | FileCheck -check-prefix=MCPU-SIFIVE7-64 %s +// MCPU-SIFIVE7-64: "-nostdsysteminc" "-target-cpu" "sifive-7-rv64" +// MCPU-SIFIVE7-64: "-target-feature" "+64bit" // mcpu with default march // RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=sifive-u54 | FileCheck -check-prefix=MCPU-SIFIVE-U54 %s @@ -28,6 +28,20 @@ // MCPU-ABI-SIFIVE-U54: "-target-feature" "+c" "-target-feature" "+64bit" // MCPU-ABI-SIFIVE-U54: "-target-abi" "lp64" +// mcpu with default march +// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=sifive-e76 | FileCheck -check-prefix=MCPU-SIFIVE-E76 %s +// MCPU-SIFIVE-E76: "-nostdsysteminc" "-target-cpu" "sifive-e76" +// MCPU-SIFIVE-E76: "-target-feature" "+m" "-target-feature" "+a" "-target-feature" "+f" "-target-feature" "+d" +// MCPU-SIFIVE-E76: "-target-feature" "+c" +// MCPU-SIFIVE-E76: "-target-abi" "lp64d" + +// mcpu with mabi option +// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=sifive-u74 -mabi=lp64 | FileCheck -check-prefix=MCPU-ABI-SIFIVE-U74 %s +// MCPU-ABI-SIFIVE-U74: "-nostdsysteminc" "-target-cpu" "sifive-u74" +// MCPU-ABI-SIFIVE-U74: "-target-feature" "+m" "-target-feature" "+a" "-target-feature" "+f" "-target-feature" "+d" +// MCPU-ABI-SIFIVE-U74: "-target-feature" "+c" "-target-feature" "+64bit" +// MCPU-ABI-SIFIVE-U74: "-target-abi" "lp64" + // march overwrite mcpu's default march // RUN: %clang -target riscv32 -### -c %s 2>&1 -mcpu=sifive-e31 -march=rv32imc | FileCheck -check-prefix=MCPU-MARCH %s // MCPU-MARCH: "-nostdsysteminc" "-target-cpu" "sifive-e31" "-target-feature" "+m" "-target-feature" "+c" diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index efcecbbc47264..309cb637c0c56 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -191,8 +191,8 @@ // RUN: not %clang_cc1 -triple riscv32 -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix RISCV32 // RISCV32: error: unknown target CPU 'not-a-cpu' -// RISCV32: note: valid target CPU values are: generic-rv32, rocket-rv32, bullet-rv32, sifive-e31 +// RISCV32: note: valid target CPU values are: generic-rv32, rocket-rv32, sifive-7-rv32, sifive-e31, sifive-e76 // RUN: not %clang_cc1 -triple riscv64 -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix RISCV64 // RISCV64: error: unknown target CPU 'not-a-cpu' -// RISCV64: note: valid target CPU values are: generic-rv64, rocket-rv64, bullet-rv64, sifive-u54 +// RISCV64: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-7-rv64, sifive-u54, sifive-u74 diff --git a/llvm/include/llvm/Support/RISCVTargetParser.def b/llvm/include/llvm/Support/RISCVTargetParser.def index e6003a4fdebb9..a63874fa5dd02 100644 --- a/llvm/include/llvm/Support/RISCVTargetParser.def +++ b/llvm/include/llvm/Support/RISCVTargetParser.def @@ -7,9 +7,11 @@ PROC(GENERIC_RV32, {"generic-rv32"}, FK_NONE, {""}) PROC(GENERIC_RV64, {"generic-rv64"}, FK_64BIT, {""}) PROC(ROCKET_RV32, {"rocket-rv32"}, FK_NONE, {""}) PROC(ROCKET_RV64, {"rocket-rv64"}, FK_64BIT, {""}) -PROC(BULLET_RV32, {"bullet-rv32"}, FK_NONE, {""}) -PROC(BULLET_RV64, {"bullet-rv64"}, FK_64BIT, {""}) +PROC(BULLET_RV32, {"sifive-7-rv32"}, FK_NONE, {""}) +PROC(BULLET_RV64, {"sifive-7-rv64"}, FK_64BIT, {""}) PROC(SIFIVE_E31, {"sifive-e31"}, FK_NONE, {"rv32imac"}) PROC(SIFIVE_U54, {"sifive-u54"}, FK_64BIT, {"rv64gc"}) +PROC(SIFIVE_E76, {"sifive-e76"}, FK_NONE, {"rv32imafc"}) +PROC(SIFIVE_U74, {"sifive-u74"}, FK_64BIT, {"rv64gc"}) #undef PROC diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 240dad1ed5cae..1b2c471faac83 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -228,8 +228,8 @@ def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit]>; def : ProcessorModel<"rocket-rv32", RocketModel, []>; def : ProcessorModel<"rocket-rv64", RocketModel, [Feature64Bit]>; -def : ProcessorModel<"bullet-rv32", BulletModel, []>; -def : ProcessorModel<"bullet-rv64", BulletModel, [Feature64Bit]>; +def : ProcessorModel<"sifive-7-rv32", BulletModel, []>; +def : ProcessorModel<"sifive-7-rv64", BulletModel, [Feature64Bit]>; def : ProcessorModel<"sifive-e31", RocketModel, [FeatureStdExtM, FeatureStdExtA, @@ -242,6 +242,18 @@ def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit, FeatureStdExtD, FeatureStdExtC]>; +def : ProcessorModel<"sifive-e76", BulletModel, [FeatureStdExtM, + FeatureStdExtA, + FeatureStdExtF, + FeatureStdExtC]>; + +def : ProcessorModel<"sifive-u74", BulletModel, [Feature64Bit, + FeatureStdExtM, + FeatureStdExtA, + FeatureStdExtF, + FeatureStdExtD, + FeatureStdExtC]>; + //===----------------------------------------------------------------------===// // Define the RISC-V target. //===----------------------------------------------------------------------===// From 567462b48eba1c2d286ce97117994463f4535d2e Mon Sep 17 00:00:00 2001 From: peter klausler Date: Wed, 30 Sep 2020 13:04:44 -0700 Subject: [PATCH 075/321] [flang] Correct kP scaling on F output The sign of the scaling factor was misinterpreted for output as meaning what it does for input. To be correct, they should cancel each other out. print '(1P,F4.3)', 1. ! printed 0.1 but should print 10.0 Differential revision: https://reviews.llvm.org/D88610 --- flang/runtime/edit-output.cpp | 4 ++-- flang/unittests/Runtime/hello.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/runtime/edit-output.cpp b/flang/runtime/edit-output.cpp index bae3606689e7d..145e01044144c 100644 --- a/flang/runtime/edit-output.cpp +++ b/flang/runtime/edit-output.cpp @@ -277,8 +277,8 @@ bool RealOutputEditing::EditFOutput(const DataEdit &edit) { return EmitPrefix(edit, converted.length, editWidth) && io_.Emit(converted.str, converted.length) && EmitSuffix(edit); } - int scale{IsZero() ? -1 : edit.modes.scale}; - int expo{converted.decimalExponent - scale}; + int scale{IsZero() ? 1 : edit.modes.scale}; // kP + int expo{converted.decimalExponent + scale}; if (expo > extraDigits && extraDigits >= 0) { extraDigits = expo; if (!edit.digits.has_value()) { // F0 diff --git a/flang/unittests/Runtime/hello.cpp b/flang/unittests/Runtime/hello.cpp index bcd3bb448318e..d17c98e74c134 100644 --- a/flang/unittests/Runtime/hello.cpp +++ b/flang/unittests/Runtime/hello.cpp @@ -232,7 +232,7 @@ int main() { {"(G32.17E4,';')", " 1.0000000000000000 ;"}, {"(1P,E32.17,';')", " 1.00000000000000000E+00;"}, {"(1PE32.17,';')", " 1.00000000000000000E+00;"}, // no comma - {"(1P,F32.17,';')", " 0.10000000000000000;"}, + {"(1P,F32.17,';')", " 10.00000000000000000;"}, {"(1P,G32.17,';')", " 1.0000000000000000 ;"}, {"(ES32.17,';')", " 1.00000000000000000E+00;"}, {"(2P,E32.17,';')", " 10.0000000000000000E-01;"}, From e00f189d392dd9bf95f6a98f05f2d341d06cd65c Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 5 Oct 2020 22:35:59 +0300 Subject: [PATCH 076/321] [InstCombine] Revert rL226781 "Teach InstCombine to canonicalize loads which are only ever stored to always use a legal integer type if one is available." (PR47592) (it was introduced in https://lists.llvm.org/pipermail/llvm-dev/2015-January/080956.html) This canonicalization seems dubious. Most importantly, while it does not create `inttoptr` casts by itself, it may cause them to appear later, see e.g. D88788. I think it's pretty obvious that it is an undesirable outcome, by now we've established that seemingly no-op `inttoptr`/`ptrtoint` casts are not no-op, and are no longer eager to look past them. Which e.g. means that given ``` %a = load i32 %b = inttoptr %a %c = inttoptr %a ``` we likely won't be able to tell that `%b` and `%c` is the same thing. As we can see in D88789 / D88788 / D88806 / D75505, we can't really teach SCEV about this (not without the https://bugs.llvm.org/show_bug.cgi?id=47592 at least) And we can't recover the situation post-inlining in instcombine. So it really does look like this fold is actively breaking otherwise-good IR, in a way that is not recoverable. And that means, this fold isn't helpful in exposing the passes that are otherwise unaware of these patterns it produces. Thusly, i propose to simply not perform such a canonicalization. The original motivational RFC does not state what larger problem that canonicalization was trying to solve, so i'm not sure how this plays out in the larger picture. On vanilla llvm test-suite + RawSpeed, this results in increase of asm instructions and final object size by ~+0.05% decreases final count of bitcasts by -4.79% (-28990), ptrtoint casts by -15.41% (-3423), and of inttoptr casts by -25.59% (-6919, *sic*). Overall, there's -0.04% less IR blocks, -0.39% instructions. See https://bugs.llvm.org/show_bug.cgi?id=47592 Differential Revision: https://reviews.llvm.org/D88789 --- .../attr-arm-sve-vector-bits-bitcast.c | 18 ++--- .../CodeGen/attr-arm-sve-vector-bits-call.c | 78 +++++++++---------- .../CodeGen/attr-arm-sve-vector-bits-cast.c | 24 +++--- .../attr-arm-sve-vector-bits-globals.c | 6 +- .../InstCombineLoadStoreAlloca.cpp | 34 -------- llvm/test/Transforms/InstCombine/atomic.ll | 18 ++--- llvm/test/Transforms/InstCombine/load.ll | 44 +++++------ .../InstCombine/loadstore-metadata.ll | 19 +---- .../InstCombine/non-integral-pointers.ll | 16 ++-- .../instcombine-sroa-inttoptr.ll | 32 ++++---- 10 files changed, 107 insertions(+), 182 deletions(-) diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c index 3a5628d7f57e4..5df1e83b13bfa 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c @@ -255,22 +255,20 @@ svbool_t read_bool(struct struct_bool *s) { // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[X_ADDR:%.*]] = alloca , align 16 // CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA15:!tbaa !.*]] -// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to i32* -// CHECK-256-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 16, [[TBAA6]] -// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1 -// CHECK-256-NEXT: [[TMP2:%.*]] = bitcast [3 x <4 x i8>]* [[Y]] to i32* -// CHECK-256-NEXT: store i32 [[TMP1]], i32* [[TMP2]], align 2, [[TBAA6]] +// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <4 x i8>* +// CHECK-256-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 16, [[TBAA6]] +// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 +// CHECK-256-NEXT: store <4 x i8> [[TMP1]], <4 x i8>* [[ARRAYIDX]], align 2, [[TBAA6]] // CHECK-256-NEXT: ret void // // CHECK-512-LABEL: @write_bool( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[X_ADDR:%.*]] = alloca , align 16 // CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA15:!tbaa !.*]] -// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to i64* -// CHECK-512-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA6]] -// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1 -// CHECK-512-NEXT: [[TMP2:%.*]] = bitcast [3 x <8 x i8>]* [[Y]] to i64* -// CHECK-512-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 2, [[TBAA6]] +// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <8 x i8>* +// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 16, [[TBAA6]] +// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 +// CHECK-512-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[ARRAYIDX]], align 2, [[TBAA6]] // CHECK-512-NEXT: ret void // void write_bool(struct struct_bool *s, svbool_t x) { diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c index 5442d58e96bea..13979197999e5 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c @@ -169,28 +169,24 @@ fixed_float64_t call_float64_ff(svbool_t pg, fixed_float64_t op1, fixed_float64_ // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to * // CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1]] to i64* -// CHECK-NEXT: [[OP113:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP2]] to * -// CHECK-NEXT: store [[OP2_COERCE:%.*]], * [[TMP2]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[OP2]] to i64* -// CHECK-NEXT: [[OP224:%.*]] = load i64, i64* [[TMP3]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to i64* -// CHECK-NEXT: store i64 [[OP113]], i64* [[TMP4]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to i64* -// CHECK-NEXT: store i64 [[OP224]], i64* [[TMP5]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to * -// CHECK-NEXT: [[TMP7:%.*]] = load , * [[TMP6]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to * -// CHECK-NEXT: [[TMP9:%.*]] = load , * [[TMP8]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP10:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[TMP7]], [[TMP9]]) -// CHECK-NEXT: store [[TMP10]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA13:!tbaa !.*]] -// CHECK-NEXT: [[TMP11:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to i64* -// CHECK-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP13:%.*]] = bitcast * [[RETVAL_COERCE]] to i64* -// CHECK-NEXT: store i64 [[TMP12]], i64* [[TMP13]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP14]] +// CHECK-NEXT: [[OP11:%.*]] = load <8 x i8>, <8 x i8>* [[OP1]], align 16, [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP2]] to * +// CHECK-NEXT: store [[OP2_COERCE:%.*]], * [[TMP1]], align 16 +// CHECK-NEXT: [[OP22:%.*]] = load <8 x i8>, <8 x i8>* [[OP2]], align 16, [[TBAA6]] +// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[OP1_ADDR]], align 16, [[TBAA6]] +// CHECK-NEXT: store <8 x i8> [[OP22]], <8 x i8>* [[OP2_ADDR]], align 16, [[TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to * +// CHECK-NEXT: [[TMP3:%.*]] = load , * [[TMP2]], align 16, [[TBAA6]] +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to * +// CHECK-NEXT: [[TMP5:%.*]] = load , * [[TMP4]], align 16, [[TBAA6]] +// CHECK-NEXT: [[TMP6:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[TMP3]], [[TMP5]]) +// CHECK-NEXT: store [[TMP6]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA13:!tbaa !.*]] +// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <8 x i8>* +// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, [[TBAA6]] +// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x i8>* +// CHECK-NEXT: store <8 x i8> [[TMP7]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = load , * [[RETVAL_COERCE]], align 16 +// CHECK-NEXT: ret [[TMP8]] // fixed_bool_t call_bool_ff(svbool_t pg, fixed_bool_t op1, fixed_bool_t op2) { return svsel(pg, op1, op2); @@ -260,20 +256,18 @@ fixed_float64_t call_float64_fs(svbool_t pg, fixed_float64_t op1, svfloat64_t op // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to * // CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1]] to i64* -// CHECK-NEXT: [[OP112:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to i64* -// CHECK-NEXT: store i64 [[OP112]], i64* [[TMP2]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to * -// CHECK-NEXT: [[TMP4:%.*]] = load , * [[TMP3]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[TMP4]], [[OP2:%.*]]) -// CHECK-NEXT: store [[TMP5]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA13]] -// CHECK-NEXT: [[TMP6:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to i64* -// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP8:%.*]] = bitcast * [[RETVAL_COERCE]] to i64* -// CHECK-NEXT: store i64 [[TMP7]], i64* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: [[OP11:%.*]] = load <8 x i8>, <8 x i8>* [[OP1]], align 16, [[TBAA6]] +// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[OP1_ADDR]], align 16, [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to * +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA6]] +// CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[TMP2]], [[OP2:%.*]]) +// CHECK-NEXT: store [[TMP3]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA13]] +// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <8 x i8>* +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, [[TBAA6]] +// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x i8>* +// CHECK-NEXT: store <8 x i8> [[TMP4]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load , * [[RETVAL_COERCE]], align 16 +// CHECK-NEXT: ret [[TMP5]] // fixed_bool_t call_bool_fs(svbool_t pg, fixed_bool_t op1, svbool_t op2) { return svsel(pg, op1, op2); @@ -325,12 +319,12 @@ fixed_float64_t call_float64_ss(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[OP1:%.*]], [[OP2:%.*]]) // CHECK-NEXT: store [[TMP0]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA13]] -// CHECK-NEXT: [[TMP1:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to i64* -// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP3:%.*]] = bitcast * [[RETVAL_COERCE]] to i64* -// CHECK-NEXT: store i64 [[TMP2]], i64* [[TMP3]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <8 x i8>* +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, [[TBAA6]] +// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x i8>* +// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[RETVAL_COERCE]], align 16 +// CHECK-NEXT: ret [[TMP2]] // fixed_bool_t call_bool_ss(svbool_t pg, svbool_t op1, svbool_t op2) { return svsel(pg, op1, op2); diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c index 17267d6038e49..8568c820ae6ff 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c @@ -81,13 +81,11 @@ fixed_float64_t from_svfloat64_t(svfloat64_t type) { // CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca <8 x i8>, align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[TYPE]] to * // CHECK-NEXT: store [[TYPE_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[TYPE]] to i64* -// CHECK-NEXT: [[TYPE12:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to i64* -// CHECK-NEXT: store i64 [[TYPE12]], i64* [[TMP2]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to * -// CHECK-NEXT: [[TMP4:%.*]] = load , * [[TMP3]], align 16, [[TBAA6]] -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TYPE1:%.*]] = load <8 x i8>, <8 x i8>* [[TYPE]], align 16, [[TBAA6]] +// CHECK-NEXT: store <8 x i8> [[TYPE1]], <8 x i8>* [[TYPE_ADDR]], align 16, [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to * +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA6]] +// CHECK-NEXT: ret [[TMP2]] // svbool_t to_svbool_t(fixed_bool_t type) { return type; @@ -98,12 +96,12 @@ svbool_t to_svbool_t(fixed_bool_t type) { // CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca , align 16 // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, [[TBAA13:!tbaa !.*]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast * [[TYPE_ADDR]] to i64* -// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = bitcast * [[RETVAL_COERCE]] to i64* -// CHECK-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast * [[TYPE_ADDR]] to <8 x i8>* +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 16, [[TBAA6]] +// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x i8>* +// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[RETVAL_COERCE]], align 16 +// CHECK-NEXT: ret [[TMP2]] // fixed_bool_t from_svbool_t(svbool_t type) { return type; diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c index 5babb9c7c410b..8f0d3c9f97e7a 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c @@ -72,9 +72,9 @@ void write_global_bf16(svbfloat16_t v) { global_bf16 = v; } // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[V_ADDR:%.*]] = alloca , align 16 // CHECK-512-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA13:!tbaa !.*]] -// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[V_ADDR]] to i64* -// CHECK-512-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA10]] -// CHECK-512-NEXT: store i64 [[TMP1]], i64* bitcast (<8 x i8>* @global_bool to i64*), align 2, [[TBAA10]] +// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[V_ADDR]] to <8 x i8>* +// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 16, [[TBAA10]] +// CHECK-512-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* @global_bool, align 2, [[TBAA10]] // CHECK-512-NEXT: ret void // void write_global_bool(svbool_t v) { global_bool = v; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 328cbde4da67d..b7bb34022eccf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -554,42 +554,8 @@ static Instruction *combineLoadToOperationType(InstCombinerImpl &IC, if (LI.getPointerOperand()->isSwiftError()) return nullptr; - Type *Ty = LI.getType(); const DataLayout &DL = IC.getDataLayout(); - // Try to canonicalize loads which are only ever stored to operate over - // integers instead of any other type. We only do this when the loaded type - // is sized and has a size exactly the same as its store size and the store - // size is a legal integer type. - // Do not perform canonicalization if minmax pattern is found (to avoid - // infinite loop). - Type *Dummy; - if (!Ty->isIntegerTy() && Ty->isSized() && !isa(Ty) && - DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) && - DL.typeSizeEqualsStoreSize(Ty) && !DL.isNonIntegralPointerType(Ty) && - !isMinMaxWithLoads(InstCombiner::peekThroughBitcast( - LI.getPointerOperand(), /*OneUseOnly=*/true), - Dummy)) { - if (all_of(LI.users(), [&LI](User *U) { - auto *SI = dyn_cast(U); - return SI && SI->getPointerOperand() != &LI && - !SI->getPointerOperand()->isSwiftError(); - })) { - LoadInst *NewLoad = IC.combineLoadToNewType( - LI, Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty))); - // Replace all the stores with stores of the newly loaded value. - for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) { - auto *SI = cast(*UI++); - IC.Builder.SetInsertPoint(SI); - combineStoreToNewValue(IC, *SI, NewLoad); - IC.eraseInstFromFunction(*SI); - } - assert(LI.use_empty() && "Failed to remove all users of the load!"); - // Return the old load so the combiner can delete it safely. - return &LI; - } - } - // Fold away bit casts of the loaded value by loading the desired type. // We can do this for BitCastInsts as well as casts from and to pointer types, // as long as those are noops (i.e., the source or dest type have the same diff --git a/llvm/test/Transforms/InstCombine/atomic.ll b/llvm/test/Transforms/InstCombine/atomic.ll index 382d23d153098..fc134bcd7e4b3 100644 --- a/llvm/test/Transforms/InstCombine/atomic.ll +++ b/llvm/test/Transforms/InstCombine/atomic.ll @@ -325,11 +325,9 @@ declare void @clobber() define i32 @test18(float* %p) { ; CHECK-LABEL: @test18( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to i32* -; CHECK-NEXT: [[X1:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4 +; CHECK-NEXT: [[X:%.*]] = load atomic float, float* [[P:%.*]] unordered, align 4 ; CHECK-NEXT: call void @clobber() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[P]] to i32* -; CHECK-NEXT: store atomic i32 [[X1]], i32* [[TMP2]] unordered, align 4 +; CHECK-NEXT: store atomic float [[X]], float* [[P]] unordered, align 4 ; CHECK-NEXT: ret i32 0 ; %x = load atomic float, float* %p unordered, align 4 @@ -376,10 +374,8 @@ define i32 @test21(i32** %p, i8* %v) { define void @pr27490a(i8** %p1, i8** %p2) { ; CHECK-LABEL: @pr27490a( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[P1:%.*]] to i64* -; CHECK-NEXT: [[L1:%.*]] = load i64, i64* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8** [[P2:%.*]] to i64* -; CHECK-NEXT: store volatile i64 [[L1]], i64* [[TMP2]], align 8 +; CHECK-NEXT: [[L:%.*]] = load i8*, i8** [[P1:%.*]], align 8 +; CHECK-NEXT: store volatile i8* [[L]], i8** [[P2:%.*]], align 8 ; CHECK-NEXT: ret void ; %l = load i8*, i8** %p1 @@ -389,10 +385,8 @@ define void @pr27490a(i8** %p1, i8** %p2) { define void @pr27490b(i8** %p1, i8** %p2) { ; CHECK-LABEL: @pr27490b( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[P1:%.*]] to i64* -; CHECK-NEXT: [[L1:%.*]] = load i64, i64* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8** [[P2:%.*]] to i64* -; CHECK-NEXT: store atomic i64 [[L1]], i64* [[TMP2]] seq_cst, align 8 +; CHECK-NEXT: [[L:%.*]] = load i8*, i8** [[P1:%.*]], align 8 +; CHECK-NEXT: store atomic i8* [[L]], i8** [[P2:%.*]] seq_cst, align 8 ; CHECK-NEXT: ret void ; %l = load i8*, i8** %p1 diff --git a/llvm/test/Transforms/InstCombine/load.ll b/llvm/test/Transforms/InstCombine/load.ll index 4fc87219fa83e..032da41de6a5c 100644 --- a/llvm/test/Transforms/InstCombine/load.ll +++ b/llvm/test/Transforms/InstCombine/load.ll @@ -205,18 +205,16 @@ define i8 @test15(i8 %x, i32 %y) { define void @test16(i8* %x, i8* %a, i8* %b, i8* %c) { ; CHECK-LABEL: @test16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; CHECK-NEXT: [[X11:%.*]] = load i32, i32* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[A:%.*]] to i32* -; CHECK-NEXT: store i32 [[X11]], i32* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[B:%.*]] to i32* -; CHECK-NEXT: store i32 [[X11]], i32* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[X]] to i32* -; CHECK-NEXT: [[X22:%.*]] = load i32, i32* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[B]] to i32* -; CHECK-NEXT: store i32 [[X22]], i32* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[C:%.*]] to i32* -; CHECK-NEXT: store i32 [[X22]], i32* [[TMP5]], align 4 +; CHECK-NEXT: [[X_CAST:%.*]] = bitcast i8* [[X:%.*]] to float* +; CHECK-NEXT: [[A_CAST:%.*]] = bitcast i8* [[A:%.*]] to float* +; CHECK-NEXT: [[B_CAST:%.*]] = bitcast i8* [[B:%.*]] to float* +; CHECK-NEXT: [[X1:%.*]] = load float, float* [[X_CAST]], align 4 +; CHECK-NEXT: store float [[X1]], float* [[A_CAST]], align 4 +; CHECK-NEXT: store float [[X1]], float* [[B_CAST]], align 4 +; CHECK-NEXT: [[X2:%.*]] = load float, float* [[X_CAST]], align 4 +; CHECK-NEXT: store float [[X2]], float* [[B_CAST]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[C:%.*]] to float* +; CHECK-NEXT: store float [[X2]], float* [[TMP0]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -240,18 +238,16 @@ entry: define void @test16-vect(i8* %x, i8* %a, i8* %b, i8* %c) { ; CHECK-LABEL: @test16-vect( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; CHECK-NEXT: [[X11:%.*]] = load i32, i32* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[A:%.*]] to i32* -; CHECK-NEXT: store i32 [[X11]], i32* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[B:%.*]] to i32* -; CHECK-NEXT: store i32 [[X11]], i32* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[X]] to i32* -; CHECK-NEXT: [[X22:%.*]] = load i32, i32* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[B]] to i32* -; CHECK-NEXT: store i32 [[X22]], i32* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[C:%.*]] to i32* -; CHECK-NEXT: store i32 [[X22]], i32* [[TMP5]], align 4 +; CHECK-NEXT: [[X_CAST:%.*]] = bitcast i8* [[X:%.*]] to <4 x i8>* +; CHECK-NEXT: [[A_CAST:%.*]] = bitcast i8* [[A:%.*]] to <4 x i8>* +; CHECK-NEXT: [[B_CAST:%.*]] = bitcast i8* [[B:%.*]] to <4 x i8>* +; CHECK-NEXT: [[X1:%.*]] = load <4 x i8>, <4 x i8>* [[X_CAST]], align 4 +; CHECK-NEXT: store <4 x i8> [[X1]], <4 x i8>* [[A_CAST]], align 4 +; CHECK-NEXT: store <4 x i8> [[X1]], <4 x i8>* [[B_CAST]], align 4 +; CHECK-NEXT: [[X2:%.*]] = load <4 x i8>, <4 x i8>* [[X_CAST]], align 4 +; CHECK-NEXT: store <4 x i8> [[X2]], <4 x i8>* [[B_CAST]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[C:%.*]] to <4 x i8>* +; CHECK-NEXT: store <4 x i8> [[X2]], <4 x i8>* [[TMP0]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll index e443c6ed00759..42f6900c2ab5e 100644 --- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll +++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll @@ -161,24 +161,11 @@ exit: } define void @test_load_cast_combine_nonnull(float** %ptr) { -; We can't preserve nonnull metadata when converting a load of a pointer to -; a load of an integer. Instead, we translate it to range metadata. -; FIXME: We should also transform range metadata back into nonnull metadata. -; FIXME: This test is very fragile. If any LABEL lines are added after -; this point, the test will fail, because this test depends on a metadata tuple, -; which is always emitted at the end of the file. At some point, we should -; consider an option to the IR printer to emit MD tuples after the function -; that first uses them--this will allow us to refer to them like this and not -; have the tests break. For now, this function must always come last in this -; file, and no LABEL lines are to be added after this point. -; ; CHECK-LABEL: @test_load_cast_combine_nonnull( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float** [[PTR:%.*]] to i64* -; CHECK-NEXT: [[P1:%.*]] = load i64, i64* [[TMP0]], align 8, !range ![[MD:[0-9]+]] +; CHECK-NEXT: [[P:%.*]] = load float*, float** [[PTR:%.*]], align 8, !nonnull !7 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr float*, float** [[PTR]], i64 42 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float** [[GEP]] to i64* -; CHECK-NEXT: store i64 [[P1]], i64* [[TMP1]], align 8 +; CHECK-NEXT: store float* [[P]], float** [[GEP]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -188,8 +175,6 @@ entry: ret void } -; This is the metadata tuple that we reference above: -; CHECK: ![[MD]] = !{i64 1, i64 0} !0 = !{!1, !1, i64 0} !1 = !{!"scalar type", !2} !2 = !{!"root"} diff --git a/llvm/test/Transforms/InstCombine/non-integral-pointers.ll b/llvm/test/Transforms/InstCombine/non-integral-pointers.ll index e8f0013604a9c..a1166bf491936 100644 --- a/llvm/test/Transforms/InstCombine/non-integral-pointers.ll +++ b/llvm/test/Transforms/InstCombine/non-integral-pointers.ll @@ -41,10 +41,8 @@ define void @f_3(i8 addrspace(3)** %ptr0, i8 addrspace(3)** %ptr1) { ; integers, since pointers in address space 3 are integral. ; CHECK-LABEL: @f_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8 addrspace(3)** [[PTR0:%.*]] to i64* -; CHECK-NEXT: [[VAL1:%.*]] = load i64, i64* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(3)** [[PTR1:%.*]] to i64* -; CHECK-NEXT: store i64 [[VAL1]], i64* [[TMP1]], align 8 +; CHECK-NEXT: [[VAL:%.*]] = load i8 addrspace(3)*, i8 addrspace(3)** [[PTR0:%.*]], align 8 +; CHECK-NEXT: store i8 addrspace(3)* [[VAL]], i8 addrspace(3)** [[PTR1:%.*]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -79,13 +77,13 @@ define i64 @g(i8 addrspace(4)** %gp) { define i64 @g2(i8* addrspace(4)* %gp) { ; CHECK-LABEL: @g2( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* addrspace(4)* [[GP:%.*]] to i64 addrspace(4)* -; CHECK-NEXT: [[DOTPRE1:%.*]] = load i64, i64 addrspace(4)* [[TMP1]], align 8 +; CHECK-NEXT: [[DOTPRE:%.*]] = load i8*, i8* addrspace(4)* [[GP:%.*]], align 8 ; CHECK-NEXT: [[V74:%.*]] = call i8 addrspace(4)* @alloc() ; CHECK-NEXT: [[V77:%.*]] = getelementptr i8, i8 addrspace(4)* [[V74]], i64 -8 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(4)* [[V77]] to i64 addrspace(4)* -; CHECK-NEXT: store i64 [[DOTPRE1]], i64 addrspace(4)* [[TMP2]], align 8 -; CHECK-NEXT: ret i64 [[DOTPRE1]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(4)* [[V77]] to i8* addrspace(4)* +; CHECK-NEXT: store i8* [[DOTPRE]], i8* addrspace(4)* [[TMP1]], align 8 +; CHECK-NEXT: [[V81_CAST:%.*]] = ptrtoint i8* [[DOTPRE]] to i64 +; CHECK-NEXT: ret i64 [[V81_CAST]] ; %.pre = load i8*, i8* addrspace(4)* %gp, align 8 %v74 = call i8 addrspace(4)* @alloc() diff --git a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll index 6de0282e9448a..3308a0ecc7fad 100644 --- a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll +++ b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll @@ -50,10 +50,10 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 define dso_local void @_Z3gen1S(%0* noalias sret align 8 %arg, %0* byval(%0) align 8 %arg1) { ; CHECK-LABEL: @_Z3gen1S( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast %0* [[ARG1:%.*]] to i64* -; CHECK-NEXT: [[I21:%.*]] = load i64, i64* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast %0* [[ARG:%.*]] to i64* -; CHECK-NEXT: store i64 [[I21]], i64* [[TMP1]], align 8 +; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* [[ARG1:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[I2:%.*]] = load i32*, i32** [[I]], align 8 +; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0 +; CHECK-NEXT: store i32* [[I2]], i32** [[I3]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -68,13 +68,12 @@ define dso_local i32* @_Z3foo1S(%0* byval(%0) align 8 %arg) { ; CHECK-LABEL: @_Z3foo1S( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I2:%.*]] = alloca [[TMP0:%.*]], align 8 -; CHECK-NEXT: [[I1_SROA_0_0_I5_SROA_CAST:%.*]] = bitcast %0* [[ARG:%.*]] to i64* -; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i64, i64* [[I1_SROA_0_0_I5_SROA_CAST]], align 8 -; CHECK-NEXT: [[I_SROA_0_0_I6_SROA_CAST:%.*]] = bitcast %0* [[I2]] to i64* -; CHECK-NEXT: store i64 [[I1_SROA_0_0_COPYLOAD]], i64* [[I_SROA_0_0_I6_SROA_CAST]], align 8 +; CHECK-NEXT: [[I1_SROA_0_0_I5_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I5_SROA_IDX]], align 8 +; CHECK-NEXT: [[I_SROA_0_0_I6_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I2]], i64 0, i32 0 +; CHECK-NEXT: store i32* [[I1_SROA_0_0_COPYLOAD]], i32** [[I_SROA_0_0_I6_SROA_IDX]], align 8 ; CHECK-NEXT: tail call void @_Z7escape01S(%0* nonnull byval(%0) align 8 [[I2]]) -; CHECK-NEXT: [[TMP0]] = inttoptr i64 [[I1_SROA_0_0_COPYLOAD]] to i32* -; CHECK-NEXT: ret i32* [[TMP0]] +; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]] ; bb: %i = alloca %0, align 8 @@ -108,24 +107,21 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) define dso_local i32* @_Z3bar1S(%0* byval(%0) align 8 %arg) { ; CHECK-LABEL: @_Z3bar1S( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[I1_SROA_0_0_I4_SROA_CAST:%.*]] = bitcast %0* [[ARG:%.*]] to i64* -; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i64, i64* [[I1_SROA_0_0_I4_SROA_CAST]], align 8 +; CHECK-NEXT: [[I1_SROA_0_0_I4_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* [[ARG:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I4_SROA_IDX]], align 8 ; CHECK-NEXT: [[I5:%.*]] = tail call i32 @_Z4condv() ; CHECK-NEXT: [[I6_NOT:%.*]] = icmp eq i32 [[I5]], 0 ; CHECK-NEXT: br i1 [[I6_NOT]], label [[BB10:%.*]], label [[BB7:%.*]] ; CHECK: bb7: ; CHECK-NEXT: tail call void @_Z5sync0v() -; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[I1_SROA_0_0_COPYLOAD]] to i32* -; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[TMP0]]) +; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[I1_SROA_0_0_COPYLOAD]]) ; CHECK-NEXT: br label [[BB13:%.*]] ; CHECK: bb10: ; CHECK-NEXT: tail call void @_Z5sync1v() -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[I1_SROA_0_0_COPYLOAD]] to i32* -; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[TMP1]]) +; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[I1_SROA_0_0_COPYLOAD]]) ; CHECK-NEXT: br label [[BB13]] ; CHECK: bb13: -; CHECK-NEXT: [[DOTPRE_PHI:%.*]] = phi i32* [ [[TMP1]], [[BB10]] ], [ [[TMP0]], [[BB7]] ] -; CHECK-NEXT: ret i32* [[DOTPRE_PHI]] +; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]] ; bb: %i = alloca %0, align 8 From 1b31b50d384b5f25221ac268ef781d26f5beacc1 Mon Sep 17 00:00:00 2001 From: ergawy Date: Mon, 5 Oct 2020 16:39:39 -0400 Subject: [PATCH 077/321] [MLIR][SPIRV] Extend _reference_of to support SpecConstantCompositeOp. Adds support for SPIR-V composite speciailization constants to spv._reference_of. Reviewed By: antiagainst Differential Revision: https://reviews.llvm.org/D88732 --- .../mlir/Dialect/SPIRV/SPIRVStructureOps.td | 2 +- mlir/lib/Dialect/SPIRV/SPIRVOps.cpp | 28 ++++++++----- .../SPIRV/Serialization/Deserializer.cpp | 22 ++++++++-- .../SPIRV/Serialization/spec-constant.mlir | 40 +++++++++++++++++++ mlir/test/Dialect/SPIRV/structure-ops.mlir | 36 ++++++++++++++++- 5 files changed, 114 insertions(+), 14 deletions(-) diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td index 0e866f02b011b..c64606bc50f9c 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td @@ -472,7 +472,7 @@ def SPV_ReferenceOfOp : SPV_Op<"_reference_of", [NoSideEffect]> { let summary = "Reference a specialization constant."; let description = [{ - Specialization constant in module scope are defined using symbol names. + Specialization constants in module scope are defined using symbol names. This op generates an SSA value that can be used to refer to the symbol within function scope for use in ops that expect an SSA value. This operation has no corresponding SPIR-V instruction; it's merely used diff --git a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp index 363785e2b7822..ad25ecb427a6c 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp @@ -2568,17 +2568,27 @@ static LogicalResult verify(spirv::ModuleOp moduleOp) { //===----------------------------------------------------------------------===// static LogicalResult verify(spirv::ReferenceOfOp referenceOfOp) { - auto specConstOp = dyn_cast_or_null( - SymbolTable::lookupNearestSymbolFrom(referenceOfOp.getParentOp(), - referenceOfOp.spec_const())); - if (!specConstOp) { - return referenceOfOp.emitOpError("expected spv.specConstant symbol"); - } - if (referenceOfOp.reference().getType() != - specConstOp.default_value().getType()) { + auto *specConstSym = SymbolTable::lookupNearestSymbolFrom( + referenceOfOp.getParentOp(), referenceOfOp.spec_const()); + Type constType; + + auto specConstOp = dyn_cast_or_null(specConstSym); + if (specConstOp) + constType = specConstOp.default_value().getType(); + + auto specConstCompositeOp = + dyn_cast_or_null(specConstSym); + if (specConstCompositeOp) + constType = specConstCompositeOp.type(); + + if (!specConstOp && !specConstCompositeOp) + return referenceOfOp.emitOpError( + "expected spv.specConstant or spv.SpecConstantComposite symbol"); + + if (referenceOfOp.reference().getType() != constType) return referenceOfOp.emitOpError("result type mismatch with the referenced " "specialization constant's type"); - } + return success(); } diff --git a/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp b/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp index 153540ddb2811..33966f8b21e92 100644 --- a/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp +++ b/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp @@ -187,6 +187,11 @@ class Deserializer { return specConstMap.lookup(id); } + /// Gets the composite specialization constant with the given result . + spirv::SpecConstantCompositeOp getSpecConstantComposite(uint32_t id) { + return specConstCompositeMap.lookup(id); + } + /// Creates a spirv::SpecConstantOp. spirv::SpecConstantOp createSpecConstant(Location loc, uint32_t resultID, Attribute defaultValue); @@ -461,9 +466,12 @@ class Deserializer { /// (and type) here. Later when it's used, we materialize the constant. DenseMap> constantMap; - // Result to variable mapping. + // Result to spec constant mapping. DenseMap specConstMap; + // Result to composite spec constant mapping. + DenseMap specConstCompositeMap; + // Result to variable mapping. DenseMap globalVariableMap; @@ -1565,7 +1573,8 @@ Deserializer::processSpecConstantComposite(ArrayRef operands) { << operands[0]; } - auto symName = opBuilder.getStringAttr(getSpecConstantSymbol(operands[1])); + auto resultID = operands[1]; + auto symName = opBuilder.getStringAttr(getSpecConstantSymbol(resultID)); SmallVector elements; elements.reserve(operands.size() - 2); @@ -1574,9 +1583,10 @@ Deserializer::processSpecConstantComposite(ArrayRef operands) { elements.push_back(opBuilder.getSymbolRefAttr(elementInfo)); } - opBuilder.create( + auto op = opBuilder.create( unknownLoc, TypeAttr::get(resultType), symName, opBuilder.getArrayAttr(elements)); + specConstCompositeMap[resultID] = op; return success(); } @@ -2208,6 +2218,12 @@ Value Deserializer::getValue(uint32_t id) { opBuilder.getSymbolRefAttr(constOp.getOperation())); return referenceOfOp.reference(); } + if (auto constCompositeOp = getSpecConstantComposite(id)) { + auto referenceOfOp = opBuilder.create( + unknownLoc, constCompositeOp.type(), + opBuilder.getSymbolRefAttr(constCompositeOp.getOperation())); + return referenceOfOp.reference(); + } if (auto undef = getUndefType(id)) { return opBuilder.create(unknownLoc, undef); } diff --git a/mlir/test/Dialect/SPIRV/Serialization/spec-constant.mlir b/mlir/test/Dialect/SPIRV/Serialization/spec-constant.mlir index 0df930162c746..2cbfcc6d219d1 100644 --- a/mlir/test/Dialect/SPIRV/Serialization/spec-constant.mlir +++ b/mlir/test/Dialect/SPIRV/Serialization/spec-constant.mlir @@ -12,6 +12,9 @@ spv.module Logical GLSL450 requires #spv.vce { // CHECK: spv.specConstant @sc_float spec_id(5) = 1.000000e+00 : f32 spv.specConstant @sc_float spec_id(5) = 1. : f32 + // CHECK: spv.specConstantComposite @scc (@sc_int, @sc_int) : !spv.array<2 x i32> + spv.specConstantComposite @scc (@sc_int, @sc_int) : !spv.array<2 x i32> + // CHECK-LABEL: @use spv.func @use() -> (i32) "None" { // We materialize a `spv._reference_of` op at every use of a @@ -24,6 +27,43 @@ spv.module Logical GLSL450 requires #spv.vce { %1 = spv.IAdd %0, %0 : i32 spv.ReturnValue %1 : i32 } + + // CHECK-LABEL: @use + spv.func @use_composite() -> (i32) "None" { + // We materialize a `spv._reference_of` op at every use of a + // specialization constant in the deserializer. So two ops here. + // CHECK: %[[USE1:.*]] = spv._reference_of @scc : !spv.array<2 x i32> + // CHECK: %[[ITM0:.*]] = spv.CompositeExtract %[[USE1]][0 : i32] : !spv.array<2 x i32> + // CHECK: %[[USE2:.*]] = spv._reference_of @scc : !spv.array<2 x i32> + // CHECK: %[[ITM1:.*]] = spv.CompositeExtract %[[USE2]][1 : i32] : !spv.array<2 x i32> + // CHECK: spv.IAdd %[[ITM0]], %[[ITM1]] + + %0 = spv._reference_of @scc : !spv.array<2 x i32> + %1 = spv.CompositeExtract %0[0 : i32] : !spv.array<2 x i32> + %2 = spv.CompositeExtract %0[1 : i32] : !spv.array<2 x i32> + %3 = spv.IAdd %1, %2 : i32 + spv.ReturnValue %3 : i32 + } +} + +// ----- + +spv.module Logical GLSL450 requires #spv.vce { + + spv.specConstant @sc_f32_1 = 1.5 : f32 + spv.specConstant @sc_f32_2 = 2.5 : f32 + spv.specConstant @sc_f32_3 = 3.5 : f32 + + spv.specConstant @sc_i32_1 = 1 : i32 + + // CHECK: spv.specConstantComposite @scc_array (@sc_f32_1, @sc_f32_2, @sc_f32_3) : !spv.array<3 x f32> + spv.specConstantComposite @scc_array (@sc_f32_1, @sc_f32_2, @sc_f32_3) : !spv.array<3 x f32> + + // CHECK: spv.specConstantComposite @scc_struct (@sc_i32_1, @sc_f32_2, @sc_f32_3) : !spv.struct + spv.specConstantComposite @scc_struct (@sc_i32_1, @sc_f32_2, @sc_f32_3) : !spv.struct + + // CHECK: spv.specConstantComposite @scc_vector (@sc_f32_1, @sc_f32_2, @sc_f32_3) : vector<3xf32> + spv.specConstantComposite @scc_vector (@sc_f32_1, @sc_f32_2, @sc_f32_3) : vector<3 x f32> } // ----- diff --git a/mlir/test/Dialect/SPIRV/structure-ops.mlir b/mlir/test/Dialect/SPIRV/structure-ops.mlir index 765eba959a26b..7bb98b92c3d28 100644 --- a/mlir/test/Dialect/SPIRV/structure-ops.mlir +++ b/mlir/test/Dialect/SPIRV/structure-ops.mlir @@ -496,6 +496,8 @@ spv.module Logical GLSL450 { spv.specConstant @sc2 = 42 : i64 spv.specConstant @sc3 = 1.5 : f32 + spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.struct + // CHECK-LABEL: @reference spv.func @reference() -> i1 "None" { // CHECK: spv._reference_of @sc1 : i1 @@ -503,6 +505,14 @@ spv.module Logical GLSL450 { spv.ReturnValue %0 : i1 } + // CHECK-LABEL: @reference_composite + spv.func @reference_composite() -> i1 "None" { + // CHECK: spv._reference_of @scc : !spv.struct + %0 = spv._reference_of @scc : !spv.struct + %1 = spv.CompositeExtract %0[0 : i32] : !spv.struct + spv.ReturnValue %1 : i1 + } + // CHECK-LABEL: @initialize spv.func @initialize() -> i64 "None" { // CHECK: spv._reference_of @sc2 : i64 @@ -534,9 +544,21 @@ func @reference_of() { // ----- +spv.specConstant @sc = 5 : i32 +spv.specConstantComposite @scc (@sc) : !spv.array<1 x i32> + +func @reference_of_composite() { + // CHECK: spv._reference_of @scc : !spv.array<1 x i32> + %0 = spv._reference_of @scc : !spv.array<1 x i32> + %1 = spv.CompositeExtract %0[0 : i32] : !spv.array<1 x i32> + return +} + +// ----- + spv.module Logical GLSL450 { spv.func @foo() -> () "None" { - // expected-error @+1 {{expected spv.specConstant symbol}} + // expected-error @+1 {{expected spv.specConstant or spv.SpecConstantComposite symbol}} %0 = spv._reference_of @sc : i32 spv.Return } @@ -555,6 +577,18 @@ spv.module Logical GLSL450 { // ----- +spv.module Logical GLSL450 { + spv.specConstant @sc = 42 : i32 + spv.specConstantComposite @scc (@sc) : !spv.array<1 x i32> + spv.func @foo() -> () "None" { + // expected-error @+1 {{result type mismatch with the referenced specialization constant's type}} + %0 = spv._reference_of @scc : f32 + spv.Return + } +} + +// ----- + //===----------------------------------------------------------------------===// // spv.specConstant //===----------------------------------------------------------------------===// From 6bec45e2558566e10be71280a3e2c1b144f1b236 Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Mon, 5 Oct 2020 16:05:14 -0500 Subject: [PATCH 078/321] [SVE] Add legalisation tests to sve-fixed-length-int-reduce.ll --- .../AArch64/sve-fixed-length-int-reduce.ll | 200 ++++++++++++++++++ 1 file changed, 200 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll index 8e3ef87908f73..633fe19efb026 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll @@ -61,6 +61,16 @@ define i8 @uaddv_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_512-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].b ; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 +; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]] +; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b +; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].b +; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <64 x i8>, <64 x i8>* %a %res = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> %op) ret i8 %res @@ -127,6 +137,16 @@ define i16 @uaddv_v32i16(<32 x i16>* %a) #0 { ; VBITS_GE_512-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].h ; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h +; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].h +; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <32 x i16>, <32 x i16>* %a %res = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> %op) ret i16 %res @@ -193,6 +213,16 @@ define i32 @uaddv_v16i32(<16 x i32>* %a) #0 { ; VBITS_GE_512-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].s ; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s +; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].s +; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <16 x i32>, <16 x i32>* %a %res = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %op) ret i32 %res @@ -259,6 +289,16 @@ define i64 @uaddv_v8i64(<8 x i64>* %a) #0 { ; VBITS_GE_512-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d ; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].d +; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <8 x i64>, <8 x i64>* %a %res = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %op) ret i64 %res @@ -329,6 +369,16 @@ define i8 @smaxv_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_512-NEXT: smaxv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b ; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 +; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]] +; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b +; VBITS_EQ_256-DAG: smaxv b[[REDUCE:[0-9]+]], [[PG]], [[MAX]].b +; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <64 x i8>, <64 x i8>* %a %res = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> %op) ret i8 %res @@ -395,6 +445,16 @@ define i16 @smaxv_v32i16(<32 x i16>* %a) #0 { ; VBITS_GE_512-NEXT: smaxv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h ; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h +; VBITS_EQ_256-DAG: smaxv h[[REDUCE:[0-9]+]], [[PG]], [[MAX]].h +; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <32 x i16>, <32 x i16>* %a %res = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> %op) ret i16 %res @@ -461,6 +521,16 @@ define i32 @smaxv_v16i32(<16 x i32>* %a) #0 { ; VBITS_GE_512-NEXT: smaxv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s ; VBITS_GE_512-NEXT: fmov w0, [[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s +; VBITS_EQ_256-DAG: smaxv [[REDUCE:s[0-9]+]], [[PG]], [[MAX]].s +; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <16 x i32>, <16 x i32>* %a %res = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> %op) ret i32 %res @@ -529,6 +599,16 @@ define i64 @smaxv_v8i64(<8 x i64>* %a) #0 { ; VBITS_GE_512-NEXT: smaxv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d ; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: smaxv [[REDUCE:d[0-9]+]], [[PG]], [[MAX]].d +; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <8 x i64>, <8 x i64>* %a %res = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> %op) ret i64 %res @@ -599,6 +679,16 @@ define i8 @sminv_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_512-NEXT: sminv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b ; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 +; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]] +; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b +; VBITS_EQ_256-DAG: sminv b[[REDUCE:[0-9]+]], [[PG]], [[MIN]].b +; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <64 x i8>, <64 x i8>* %a %res = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> %op) ret i8 %res @@ -665,6 +755,16 @@ define i16 @sminv_v32i16(<32 x i16>* %a) #0 { ; VBITS_GE_512-NEXT: sminv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h ; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h +; VBITS_EQ_256-DAG: sminv h[[REDUCE:[0-9]+]], [[PG]], [[MIN]].h +; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <32 x i16>, <32 x i16>* %a %res = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> %op) ret i16 %res @@ -731,6 +831,16 @@ define i32 @sminv_v16i32(<16 x i32>* %a) #0 { ; VBITS_GE_512-NEXT: sminv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s ; VBITS_GE_512-NEXT: fmov w0, [[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s +; VBITS_EQ_256-DAG: sminv [[REDUCE:s[0-9]+]], [[PG]], [[MIN]].s +; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <16 x i32>, <16 x i32>* %a %res = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> %op) ret i32 %res @@ -799,6 +909,16 @@ define i64 @sminv_v8i64(<8 x i64>* %a) #0 { ; VBITS_GE_512-NEXT: sminv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d ; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: sminv [[REDUCE:d[0-9]+]], [[PG]], [[MIN]].d +; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <8 x i64>, <8 x i64>* %a %res = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> %op) ret i64 %res @@ -869,6 +989,16 @@ define i8 @umaxv_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_512-NEXT: umaxv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b ; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 +; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]] +; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b +; VBITS_EQ_256-DAG: umaxv b[[REDUCE:[0-9]+]], [[PG]], [[MAX]].b +; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <64 x i8>, <64 x i8>* %a %res = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> %op) ret i8 %res @@ -935,6 +1065,16 @@ define i16 @umaxv_v32i16(<32 x i16>* %a) #0 { ; VBITS_GE_512-NEXT: umaxv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h ; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h +; VBITS_EQ_256-DAG: umaxv h[[REDUCE:[0-9]+]], [[PG]], [[MAX]].h +; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <32 x i16>, <32 x i16>* %a %res = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> %op) ret i16 %res @@ -1001,6 +1141,16 @@ define i32 @umaxv_v16i32(<16 x i32>* %a) #0 { ; VBITS_GE_512-NEXT: umaxv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s ; VBITS_GE_512-NEXT: fmov w0, [[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s +; VBITS_EQ_256-DAG: umaxv [[REDUCE:s[0-9]+]], [[PG]], [[MAX]].s +; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <16 x i32>, <16 x i32>* %a %res = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %op) ret i32 %res @@ -1069,6 +1219,16 @@ define i64 @umaxv_v8i64(<8 x i64>* %a) #0 { ; VBITS_GE_512-NEXT: umaxv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d ; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: umaxv [[REDUCE:d[0-9]+]], [[PG]], [[MAX]].d +; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <8 x i64>, <8 x i64>* %a %res = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> %op) ret i64 %res @@ -1139,6 +1299,16 @@ define i8 @uminv_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_512-NEXT: uminv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b ; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 +; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]] +; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b +; VBITS_EQ_256-DAG: uminv b[[REDUCE:[0-9]+]], [[PG]], [[MIN]].b +; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <64 x i8>, <64 x i8>* %a %res = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> %op) ret i8 %res @@ -1205,6 +1375,16 @@ define i16 @uminv_v32i16(<32 x i16>* %a) #0 { ; VBITS_GE_512-NEXT: uminv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h ; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h +; VBITS_EQ_256-DAG: uminv h[[REDUCE:[0-9]+]], [[PG]], [[MIN]].h +; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <32 x i16>, <32 x i16>* %a %res = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> %op) ret i16 %res @@ -1271,6 +1451,16 @@ define i32 @uminv_v16i32(<16 x i32>* %a) #0 { ; VBITS_GE_512-NEXT: uminv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s ; VBITS_GE_512-NEXT: fmov w0, [[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s +; VBITS_EQ_256-DAG: uminv [[REDUCE:s[0-9]+]], [[PG]], [[MIN]].s +; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <16 x i32>, <16 x i32>* %a %res = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> %op) ret i32 %res @@ -1339,6 +1529,16 @@ define i64 @uminv_v8i64(<8 x i64>* %a) #0 { ; VBITS_GE_512-NEXT: uminv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d ; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: uminv [[REDUCE:d[0-9]+]], [[PG]], [[MIN]].d +; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] +; VBITS_EQ_256-NEXT: ret %op = load <8 x i64>, <8 x i64>* %a %res = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> %op) ret i64 %res From 9afb1c566e8cb396da495e2fbbbc53e1814cc3a1 Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Mon, 5 Oct 2020 13:41:12 -0700 Subject: [PATCH 079/321] Revert "Outline non returning functions unless a longjmp" This reverts commit 20797989ea190f2ef22d13c5a7a0535fe9afa58b. This patch (https://reviews.llvm.org/D69257) cannot complete a stage2 build due to the change: ``` CI->getCalledFunction()->getName().contains("longjmp") ``` There are several concrete issues here: - The callee may not be a function, so `getCalledFunction` can assert. - The called value may not have a name, so `getName` can assert. - There's no distinction made between "my_longjmp_test_helper" and the actual longjmp libcall. At a higher level, there's a serious layering problem here. The splitting pass makes policy decisions in a general way (e.g. based on attributes or profile data). Special-casing certain names breaks the layering. It subverts the work of library maintainers (who may now need to opt-out of unexpected optimization behavior for any affected functions) and can lead to inconsistent optimization behavior (as not all llvm passes special-case ".*longjmp.*" in the same way). The patch may need significant revision to address these issues. But the immediate issue is that this crashes while compiling llvm's unit tests in a stage2 build (due to the `getName` problem). --- llvm/lib/Transforms/IPO/HotColdSplitting.cpp | 18 +-- .../HotColdSplit/longjmp-nosplit.ll | 97 ------------- .../Transforms/HotColdSplit/longjmp-split.ll | 132 ----------------- .../Transforms/HotColdSplit/sjlj-nosplit.ll | 103 ------------- .../Transforms/HotColdSplit/sjlj-split.ll | 136 ------------------ .../HotColdSplit/split-assert-fail.ll | 47 ------ 6 files changed, 5 insertions(+), 528 deletions(-) delete mode 100644 llvm/test/Transforms/HotColdSplit/longjmp-nosplit.ll delete mode 100644 llvm/test/Transforms/HotColdSplit/longjmp-split.ll delete mode 100644 llvm/test/Transforms/HotColdSplit/sjlj-nosplit.ll delete mode 100644 llvm/test/Transforms/HotColdSplit/sjlj-split.ll delete mode 100644 llvm/test/Transforms/HotColdSplit/split-assert-fail.ll diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp index ad7df0607377c..2460099fba438 100644 --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -113,8 +113,7 @@ bool blockEndsInUnreachable(const BasicBlock &BB) { return !(isa(I) || isa(I)); } -bool unlikelyExecuted(BasicBlock &BB, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI) { +bool unlikelyExecuted(BasicBlock &BB) { // Exception handling blocks are unlikely executed. if (BB.isEHPad() || isa(BB.getTerminator())) return true; @@ -127,19 +126,12 @@ bool unlikelyExecuted(BasicBlock &BB, ProfileSummaryInfo *PSI, return true; // The block is cold if it has an unreachable terminator, unless it's - // preceded by a call to a (possibly warm) noreturn call (e.g. longjmp); - // in the case of a longjmp, if the block is cold according to - // profile information, we mark it as unlikely to be executed as well. + // preceded by a call to a (possibly warm) noreturn call (e.g. longjmp). if (blockEndsInUnreachable(BB)) { if (auto *CI = dyn_cast_or_null(BB.getTerminator()->getPrevNode())) - if (CI->hasFnAttr(Attribute::NoReturn)) { - if (IntrinsicInst *II = dyn_cast(CI)) - return (II->getIntrinsicID() != Intrinsic::eh_sjlj_longjmp) || - (BFI && PSI->isColdBlock(&BB, BFI)); - return !CI->getCalledFunction()->getName().contains("longjmp") || - (BFI && PSI->isColdBlock(&BB, BFI)); - } + if (CI->hasFnAttr(Attribute::NoReturn)) + return false; return true; } @@ -599,7 +591,7 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { continue; bool Cold = (BFI && PSI->isColdBlock(BB, BFI)) || - (EnableStaticAnalysis && unlikelyExecuted(*BB, PSI, BFI)); + (EnableStaticAnalysis && unlikelyExecuted(*BB)); if (!Cold) continue; diff --git a/llvm/test/Transforms/HotColdSplit/longjmp-nosplit.ll b/llvm/test/Transforms/HotColdSplit/longjmp-nosplit.ll deleted file mode 100644 index d207e8dae3850..0000000000000 --- a/llvm/test/Transforms/HotColdSplit/longjmp-nosplit.ll +++ /dev/null @@ -1,97 +0,0 @@ -; RUN: opt -hotcoldsplit -S < %s | FileCheck %s -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct.__jmp_buf_tag = type { [8 x i64], i32, %struct.__sigset_t } -%struct.__sigset_t = type { [16 x i64] } - -@c = dso_local global i32 1, align 4 -@buf = dso_local global [20 x i8*] zeroinitializer, align 16 - -; CHECK-LABEL: @f -; CHECK-NOT: f.cold.1 -define dso_local void @f() #0 { -entry: - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %k = alloca i32, align 4 - %0 = load i32, i32* @c, align 4 - %tobool = icmp ne i32 %0, 0 - br i1 %tobool, label %if.then, label %if.else - -if.then: ; preds = %entry - ret void - -if.else: ; preds = %entry - %1 = load i32, i32* @c, align 4 - %inc = add i32 %1, 1 - store i32 %inc, i32* @c, align 4 - %2 = load i32, i32* @c, align 4 - %inc1 = add i32 %2, 1 - store i32 %inc1, i32* @c, align 4 - %3 = load i32, i32* @c, align 4 - %inc2 = add i32 %3, 1 - store i32 %inc2, i32* @c, align 4 - %4 = load i32, i32* @c, align 4 - %inc3 = add i32 %4, 1 - store i32 %inc3, i32* @c, align 4 - %5 = load i32, i32* @c, align 4 - %dec = add i32 %5, -1 - store i32 %dec, i32* @c, align 4 - %6 = load i32, i32* @c, align 4 - %dec4 = add i32 %6, -1 - store i32 %dec4, i32* @c, align 4 - %7 = load i32, i32* @c, align 4 - %inc5 = add i32 %7, 1 - store i32 %inc5, i32* @c, align 4 - %8 = load i32, i32* @c, align 4 - %inc6 = add i32 %8, 1 - store i32 %inc6, i32* @c, align 4 - %9 = load i32, i32* @c, align 4 - %add = add i32 %9, 1 - store i32 %add, i32* %i, align 4 - %10 = load i32, i32* %i, align 4 - %sub = sub i32 %10, 1 - store i32 %sub, i32* %j, align 4 - %11 = load i32, i32* %i, align 4 - %add7 = add i32 %11, 2 - store i32 %add7, i32* %k, align 4 - call void @longjmp(%struct.__jmp_buf_tag* bitcast ([20 x i8*]* @buf to %struct.__jmp_buf_tag*), i32 1) #3 - unreachable -} - -declare dso_local void @longjmp(%struct.__jmp_buf_tag*, i32) #1 - -; CHECK-LABEL: @main -; CHECK-NOT: main.cold.1 -define dso_local i32 @main() #0 { -entry: - %retval = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 0, i32* %retval, align 4 - store i32 0, i32* %i, align 4 - %call = call i32 @_setjmp(%struct.__jmp_buf_tag* bitcast ([20 x i8*]* @buf to %struct.__jmp_buf_tag*)) #4 - %tobool = icmp ne i32 %call, 0 - br i1 %tobool, label %if.then, label %if.end - -if.then: ; preds = %entry - store i32 1, i32* %retval, align 4 - br label %return - -if.end: ; preds = %entry - call void @f() - store i32 0, i32* %retval, align 4 - br label %return - -return: ; preds = %if.end, %if.then - %0 = load i32, i32* %retval, align 4 - ret i32 %0 -} - -declare dso_local i32 @_setjmp(%struct.__jmp_buf_tag*) #2 - -attributes #0 = { nounwind uwtable } -attributes #1 = { noreturn nounwind } -attributes #2 = { nounwind returns_twice } -attributes #3 = { noreturn nounwind } -attributes #4 = { nounwind returns_twice } diff --git a/llvm/test/Transforms/HotColdSplit/longjmp-split.ll b/llvm/test/Transforms/HotColdSplit/longjmp-split.ll deleted file mode 100644 index 905f146a79e18..0000000000000 --- a/llvm/test/Transforms/HotColdSplit/longjmp-split.ll +++ /dev/null @@ -1,132 +0,0 @@ -; RUN: opt -profile-summary-cold-count=0 -hotcoldsplit -S < %s | FileCheck %s - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct.__jmp_buf_tag = type { [8 x i64], i32, %struct.__sigset_t } -%struct.__sigset_t = type { [16 x i64] } - -@c = dso_local global i32 1, align 4 -@buf = dso_local global [20 x i8*] zeroinitializer, align 16 - -; CHECK-LABEL: @f -; CHECK: f.cold.1 -define dso_local void @f() #0 !prof !31 { -entry: - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %k = alloca i32, align 4 - %0 = load i32, i32* @c, align 4 - %tobool = icmp ne i32 %0, 0 - br i1 %tobool, label %if.then, label %if.else, !prof !32 - -if.then: ; preds = %entry - ret void - -if.else: ; preds = %entry - %1 = load i32, i32* @c, align 4 - %inc = add i32 %1, 1 - store i32 %inc, i32* @c, align 4 - %2 = load i32, i32* @c, align 4 - %inc1 = add i32 %2, 1 - store i32 %inc1, i32* @c, align 4 - %3 = load i32, i32* @c, align 4 - %inc2 = add i32 %3, 1 - store i32 %inc2, i32* @c, align 4 - %4 = load i32, i32* @c, align 4 - %inc3 = add i32 %4, 1 - store i32 %inc3, i32* @c, align 4 - %5 = load i32, i32* @c, align 4 - %dec = add i32 %5, -1 - store i32 %dec, i32* @c, align 4 - %6 = load i32, i32* @c, align 4 - %dec4 = add i32 %6, -1 - store i32 %dec4, i32* @c, align 4 - %7 = load i32, i32* @c, align 4 - %inc5 = add i32 %7, 1 - store i32 %inc5, i32* @c, align 4 - %8 = load i32, i32* @c, align 4 - %inc6 = add i32 %8, 1 - store i32 %inc6, i32* @c, align 4 - %9 = load i32, i32* @c, align 4 - %add = add i32 %9, 1 - store i32 %add, i32* %i, align 4 - %10 = load i32, i32* %i, align 4 - %sub = sub i32 %10, 1 - store i32 %sub, i32* %j, align 4 - %11 = load i32, i32* %i, align 4 - %add7 = add i32 %11, 2 - store i32 %add7, i32* %k, align 4 - call void @longjmp(%struct.__jmp_buf_tag* bitcast ([20 x i8*]* @buf to %struct.__jmp_buf_tag*), i32 1) #3 - unreachable -} - -declare dso_local void @longjmp(%struct.__jmp_buf_tag*, i32) #1 - -define dso_local i32 @main() #0 !prof !31 { -entry: - %retval = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 0, i32* %retval, align 4 - store i32 0, i32* %i, align 4 - %call = call i32 @_setjmp(%struct.__jmp_buf_tag* bitcast ([20 x i8*]* @buf to %struct.__jmp_buf_tag*)) #4 - %tobool = icmp ne i32 %call, 0 - br i1 %tobool, label %if.then, label %if.end, !prof !33 - -if.then: ; preds = %entry - store i32 1, i32* %retval, align 4 - br label %return - -if.end: ; preds = %entry - call void @f() - store i32 0, i32* %retval, align 4 - br label %return - -return: ; preds = %if.end, %if.then - %0 = load i32, i32* %retval, align 4 - ret i32 %0 -} - -declare dso_local i32 @_setjmp(%struct.__jmp_buf_tag*) #2 - -attributes #0 = { inlinehint nounwind uwtable } -attributes #1 = { noreturn nounwind } -attributes #2 = { nounwind returns_twice } -attributes #3 = { noreturn nounwind } -attributes #4 = { nounwind returns_twice } - -!llvm.module.flags = !{!0, !1} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 1, !"ProfileSummary", !2} -!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12} -!3 = !{!"ProfileFormat", !"InstrProf"} -!4 = !{!"TotalCount", i64 2} -!5 = !{!"MaxCount", i64 1} -!6 = !{!"MaxInternalCount", i64 1} -!7 = !{!"MaxFunctionCount", i64 1} -!8 = !{!"NumCounts", i64 4} -!9 = !{!"NumFunctions", i64 2} -!10 = !{!"IsPartialProfile", i64 0} -!11 = !{!"PartialProfileRatio", double 0.000000e+00} -!12 = !{!"DetailedSummary", !13} -!13 = !{!14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29} -!14 = !{i32 10000, i64 0, i32 0} -!15 = !{i32 100000, i64 0, i32 0} -!16 = !{i32 200000, i64 0, i32 0} -!17 = !{i32 300000, i64 0, i32 0} -!18 = !{i32 400000, i64 0, i32 0} -!19 = !{i32 500000, i64 1, i32 2} -!20 = !{i32 600000, i64 1, i32 2} -!21 = !{i32 700000, i64 1, i32 2} -!22 = !{i32 800000, i64 1, i32 2} -!23 = !{i32 900000, i64 1, i32 2} -!24 = !{i32 950000, i64 1, i32 2} -!25 = !{i32 990000, i64 1, i32 2} -!26 = !{i32 999000, i64 1, i32 2} -!27 = !{i32 999900, i64 1, i32 2} -!28 = !{i32 999990, i64 1, i32 2} -!29 = !{i32 999999, i64 1, i32 2} -!31 = !{!"function_entry_count", i64 1} -!32 = !{!"branch_weights", i32 1, i32 0} -!33 = !{!"branch_weights", i32 0, i32 1} diff --git a/llvm/test/Transforms/HotColdSplit/sjlj-nosplit.ll b/llvm/test/Transforms/HotColdSplit/sjlj-nosplit.ll deleted file mode 100644 index f02a1b376ecb2..0000000000000 --- a/llvm/test/Transforms/HotColdSplit/sjlj-nosplit.ll +++ /dev/null @@ -1,103 +0,0 @@ -; RUN: opt -hotcoldsplit -S < %s | FileCheck %s -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@c = dso_local global i32 1, align 4 -@buf = dso_local global [20 x i8*] zeroinitializer, align 16 - -; CHECK-LABEL: @f -; CHECK-NOT: f.cold.1 -define dso_local void @f() #0 { -entry: - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %k = alloca i32, align 4 - %0 = load i32, i32* @c, align 4 - %tobool = icmp ne i32 %0, 0 - br i1 %tobool, label %if.then, label %if.else - -if.then: ; preds = %entry - ret void - -if.else: ; preds = %entry - %1 = load i32, i32* @c, align 4 - %inc = add nsw i32 %1, 1 - store i32 %inc, i32* @c, align 4 - %2 = load i32, i32* @c, align 4 - %inc1 = add nsw i32 %2, 1 - store i32 %inc1, i32* @c, align 4 - %3 = load i32, i32* @c, align 4 - %inc2 = add nsw i32 %3, 1 - store i32 %inc2, i32* @c, align 4 - %4 = load i32, i32* @c, align 4 - %inc3 = add nsw i32 %4, 1 - store i32 %inc3, i32* @c, align 4 - %5 = load i32, i32* @c, align 4 - %dec = add nsw i32 %5, -1 - store i32 %dec, i32* @c, align 4 - %6 = load i32, i32* @c, align 4 - %dec4 = add nsw i32 %6, -1 - store i32 %dec4, i32* @c, align 4 - %7 = load i32, i32* @c, align 4 - %inc5 = add nsw i32 %7, 1 - store i32 %inc5, i32* @c, align 4 - %8 = load i32, i32* @c, align 4 - %inc6 = add nsw i32 %8, 1 - store i32 %inc6, i32* @c, align 4 - %9 = load i32, i32* @c, align 4 - %add = add nsw i32 %9, 1 - store i32 %add, i32* %i, align 4 - %10 = load i32, i32* %i, align 4 - %sub = sub nsw i32 %10, 1 - store i32 %sub, i32* %j, align 4 - %11 = load i32, i32* %i, align 4 - %add7 = add nsw i32 %11, 2 - store i32 %add7, i32* %k, align 4 - call void @llvm.eh.sjlj.longjmp(i8* bitcast ([20 x i8*]* @buf to i8*)) - unreachable -} - -declare void @llvm.eh.sjlj.longjmp(i8*) #1 - -; CHECK-LABEL: @main -; CHECK-NOT: main.cold.1 -define dso_local i32 @main() #0 { -entry: - %retval = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 0, i32* %retval, align 4 - store i32 0, i32* %i, align 4 - %0 = call i8* @llvm.frameaddress.p0i8(i32 0) - store i8* %0, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 0), align 16 - %1 = call i8* @llvm.stacksave() - store i8* %1, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 2), align 16 - %2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([20 x i8*]* @buf to i8*)) - %tobool = icmp ne i32 %2, 0 - br i1 %tobool, label %if.then, label %if.end - -if.then: ; preds = %entry - store i32 1, i32* %retval, align 4 - br label %return - -if.end: ; preds = %entry - call void @f() - store i32 0, i32* %retval, align 4 - br label %return - -return: ; preds = %if.end, %if.then - %3 = load i32, i32* %retval, align 4 - ret i32 %3 -} - -declare i8* @llvm.frameaddress.p0i8(i32 immarg) #2 - -declare i8* @llvm.stacksave() #3 - -declare i32 @llvm.eh.sjlj.setjmp(i8*) #3 - -attributes #0 = { nounwind uwtable } -attributes #1 = { noreturn nounwind } -attributes #2 = { nounwind readnone } -attributes #3 = { nounwind } - - diff --git a/llvm/test/Transforms/HotColdSplit/sjlj-split.ll b/llvm/test/Transforms/HotColdSplit/sjlj-split.ll deleted file mode 100644 index 3a12677ecf519..0000000000000 --- a/llvm/test/Transforms/HotColdSplit/sjlj-split.ll +++ /dev/null @@ -1,136 +0,0 @@ -; RUN: opt -profile-summary-cold-count=0 -hotcoldsplit -S < %s | FileCheck %s -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@c = dso_local global i32 1, align 4 -@buf = dso_local global [20 x i8*] zeroinitializer, align 16 - -; CHECK-LABEL: @f -; CHECK: f.cold.1 -define dso_local void @f() #0 !prof !31 { -entry: - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %k = alloca i32, align 4 - %0 = load i32, i32* @c, align 4 - %tobool = icmp ne i32 %0, 0 - br i1 %tobool, label %if.then, label %if.else, !prof !32 - -if.then: ; preds = %entry - ret void - -if.else: ; preds = %entry - %1 = load i32, i32* @c, align 4 - %inc = add i32 %1, 1 - store i32 %inc, i32* @c, align 4 - %2 = load i32, i32* @c, align 4 - %inc1 = add i32 %2, 1 - store i32 %inc1, i32* @c, align 4 - %3 = load i32, i32* @c, align 4 - %inc2 = add i32 %3, 1 - store i32 %inc2, i32* @c, align 4 - %4 = load i32, i32* @c, align 4 - %inc3 = add i32 %4, 1 - store i32 %inc3, i32* @c, align 4 - %5 = load i32, i32* @c, align 4 - %dec = add i32 %5, -1 - store i32 %dec, i32* @c, align 4 - %6 = load i32, i32* @c, align 4 - %dec4 = add i32 %6, -1 - store i32 %dec4, i32* @c, align 4 - %7 = load i32, i32* @c, align 4 - %inc5 = add i32 %7, 1 - store i32 %inc5, i32* @c, align 4 - %8 = load i32, i32* @c, align 4 - %inc6 = add i32 %8, 1 - store i32 %inc6, i32* @c, align 4 - %9 = load i32, i32* @c, align 4 - %add = add i32 %9, 1 - store i32 %add, i32* %i, align 4 - %10 = load i32, i32* %i, align 4 - %sub = sub i32 %10, 1 - store i32 %sub, i32* %j, align 4 - %11 = load i32, i32* %i, align 4 - %add7 = add i32 %11, 2 - store i32 %add7, i32* %k, align 4 - call void @llvm.eh.sjlj.longjmp(i8* bitcast ([20 x i8*]* @buf to i8*)) - unreachable -} - -declare void @llvm.eh.sjlj.longjmp(i8*) #1 - -define dso_local i32 @main() #0 !prof !31 { -entry: - %retval = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 0, i32* %retval, align 4 - store i32 0, i32* %i, align 4 - %0 = call i8* @llvm.frameaddress.p0i8(i32 0) - store i8* %0, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 0), align 16 - %1 = call i8* @llvm.stacksave() - store i8* %1, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 2), align 16 - %2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([20 x i8*]* @buf to i8*)) - %tobool = icmp ne i32 %2, 0 - br i1 %tobool, label %if.then, label %if.end, !prof !33 - -if.then: ; preds = %entry - store i32 1, i32* %retval, align 4 - br label %return - -if.end: ; preds = %entry - call void @f() - store i32 0, i32* %retval, align 4 - br label %return - -return: ; preds = %if.end, %if.then - %3 = load i32, i32* %retval, align 4 - ret i32 %3 -} - -declare i8* @llvm.frameaddress.p0i8(i32 immarg) #2 - -declare i8* @llvm.stacksave() #3 - -declare i32 @llvm.eh.sjlj.setjmp(i8*) #3 - -attributes #0 = { inlinehint nounwind uwtable } -attributes #1 = { noreturn nounwind } -attributes #2 = { nounwind readnone } -attributes #3 = { nounwind } - -!llvm.module.flags = !{!0, !1} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 1, !"ProfileSummary", !2} -!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12} -!3 = !{!"ProfileFormat", !"InstrProf"} -!4 = !{!"TotalCount", i64 2} -!5 = !{!"MaxCount", i64 1} -!6 = !{!"MaxInternalCount", i64 1} -!7 = !{!"MaxFunctionCount", i64 1} -!8 = !{!"NumCounts", i64 4} -!9 = !{!"NumFunctions", i64 2} -!10 = !{!"IsPartialProfile", i64 0} -!11 = !{!"PartialProfileRatio", double 0.000000e+00} -!12 = !{!"DetailedSummary", !13} -!13 = !{!14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29} -!14 = !{i32 10000, i64 0, i32 0} -!15 = !{i32 100000, i64 0, i32 0} -!16 = !{i32 200000, i64 0, i32 0} -!17 = !{i32 300000, i64 0, i32 0} -!18 = !{i32 400000, i64 0, i32 0} -!19 = !{i32 500000, i64 1, i32 2} -!20 = !{i32 600000, i64 1, i32 2} -!21 = !{i32 700000, i64 1, i32 2} -!22 = !{i32 800000, i64 1, i32 2} -!23 = !{i32 900000, i64 1, i32 2} -!24 = !{i32 950000, i64 1, i32 2} -!25 = !{i32 990000, i64 1, i32 2} -!26 = !{i32 999000, i64 1, i32 2} -!27 = !{i32 999900, i64 1, i32 2} -!28 = !{i32 999990, i64 1, i32 2} -!29 = !{i32 999999, i64 1, i32 2} -!31 = !{!"function_entry_count", i64 1} -!32 = !{!"branch_weights", i32 1, i32 0} -!33 = !{!"branch_weights", i32 0, i32 1} - diff --git a/llvm/test/Transforms/HotColdSplit/split-assert-fail.ll b/llvm/test/Transforms/HotColdSplit/split-assert-fail.ll deleted file mode 100644 index ea5f4b9114917..0000000000000 --- a/llvm/test/Transforms/HotColdSplit/split-assert-fail.ll +++ /dev/null @@ -1,47 +0,0 @@ -; REQUIRES: asserts -; RUN: opt -S -instsimplify -hotcoldsplit -debug < %s 2>&1 | FileCheck %s -; RUN: opt -instcombine -hotcoldsplit -instsimplify %s -o /dev/null - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@.str = private unnamed_addr constant [2 x i8] c"0\00", align 1 -@.str.1 = private unnamed_addr constant [14 x i8] c"assert-fail.c\00", align 1 -@__PRETTY_FUNCTION__.main = private unnamed_addr constant [15 x i8] c"int main(void)\00", align 1 - -; CHECK: @f -; CHECK-LABEL: codeRepl: -; CHECK } -; CHECK: define {{.*}}@f.cold.1() -; CHECK-LABEL: newFuncRoot: -; CHECK: br label %if.then - -; Function Attrs: nounwind willreturn -define i32 @f() #0 { -entry: - %retval = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 0, i32* %retval, align 4 - store i32 0, i32* %i, align 4 - %0 = load i32, i32* %i, align 4 - %cmp = icmp eq i32 %0, 2 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - call void @__assert_fail(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), i32 10, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__PRETTY_FUNCTION__.main, i64 0, i64 0)) #1 - unreachable - -if.end: ; preds = %entry - %1 = load i32, i32* %i, align 4 - %add = add nsw i32 %1, 1 - store i32 %add, i32* %i, align 4 - %2 = load i32, i32* %i, align 4 - ret i32 %2 -} - -; Function Attrs: noreturn nounwind -declare dso_local void @__assert_fail(i8*, i8*, i32, i8*) #1 - -attributes #0 = { nounwind willreturn } -attributes #1 = { noreturn nounwind } - From 27e1cc6f391b57c9e20344c8a6d77a57f697eb87 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 5 Oct 2020 14:47:46 -0700 Subject: [PATCH 080/321] Cleanup CodeGen/CallingConvLower.cpp Patch by pi1024e (email unavailable) Differential Revision: https://reviews.llvm.org/D82593 --- llvm/lib/CodeGen/CallingConvLower.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp index 9662a583e3694..9afaf95b5bb89 100644 --- a/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/llvm/lib/CodeGen/CallingConvLower.cpp @@ -195,9 +195,7 @@ static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) { return true; // Assume -msse-regparm might be in effect. if (!VT.isInteger()) return false; - if (CC == CallingConv::X86_VectorCall || CC == CallingConv::X86_FastCall) - return true; - return false; + return (CC == CallingConv::X86_VectorCall || CC == CallingConv::X86_FastCall); } void CCState::getRemainingRegParmsForType(SmallVectorImpl &Regs, @@ -213,8 +211,8 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl &Regs, // Allocate something of this value type repeatedly until we get assigned a // location in memory. - bool HaveRegParm = true; - while (HaveRegParm) { + bool HaveRegParm; + do { if (Fn(0, VT, VT, CCValAssign::Full, Flags, *this)) { #ifndef NDEBUG dbgs() << "Call has unhandled type " << EVT(VT).getEVTString() @@ -223,7 +221,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl &Regs, llvm_unreachable(nullptr); } HaveRegParm = Locs.back().isRegLoc(); - } + } while (HaveRegParm); // Copy all the registers from the value locations we added. assert(NumLocs < Locs.size() && "CC assignment failed to add location"); @@ -254,7 +252,7 @@ void CCState::analyzeMustTailForwardedRegisters( const TargetLowering *TL = MF.getSubtarget().getTargetLowering(); const TargetRegisterClass *RC = TL->getRegClassFor(RegVT); for (MCPhysReg PReg : RemainingRegs) { - unsigned VReg = MF.addLiveIn(PReg, RC); + Register VReg = MF.addLiveIn(PReg, RC); Forwards.push_back(ForwardedRegister(VReg, PReg, RegVT)); } } From 1127662c6dc2a276839c75a42238b11a3ad00f32 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 5 Oct 2020 14:27:14 -0700 Subject: [PATCH 081/321] [SelectionDAG] Make sure FMF are propagated when getSetcc canonicalizes FP constants to RHS. getNode handling for ISD:SETCC calls FoldSETCC which can canonicalize FP constants to the RHS. When this happens we should create the node with the FMF that was requested. By using FlagInserter when can ensure any calls to getNode/getSetcc during canonicalization will also get the flags. Differential Revision: https://reviews.llvm.org/D88063 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 12 +++++++----- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 8 ++++---- llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 8 ++++---- .../lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 3 ++- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/test/CodeGen/X86/fmf-propagation.ll | 10 +++++++++- 9 files changed, 30 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index f86d46da23ce0..4c0412315c19a 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -339,11 +339,13 @@ class SelectionDAG { FlagInserter *LastInserter; public: - FlagInserter(SelectionDAG &SDAG, SDNode *N) - : DAG(SDAG), Flags(N->getFlags()), + FlagInserter(SelectionDAG &SDAG, SDNodeFlags Flags) + : DAG(SDAG), Flags(Flags), LastInserter(SDAG.getFlagInserter()) { SDAG.setFlagInserter(this); } + FlagInserter(SelectionDAG &SDAG, SDNode *N) + : FlagInserter(SDAG, N->getFlags()) {} FlagInserter(const FlagInserter &) = delete; FlagInserter &operator=(const FlagInserter &) = delete; @@ -1083,8 +1085,8 @@ class SelectionDAG { /// Helper function to make it easier to build SetCC's if you just have an /// ISD::CondCode instead of an SDValue. SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, - ISD::CondCode Cond, SDNodeFlags Flags = SDNodeFlags(), - SDValue Chain = SDValue(), bool IsSignaling = false) { + ISD::CondCode Cond, SDValue Chain = SDValue(), + bool IsSignaling = false) { assert(LHS.getValueType().isVector() == RHS.getValueType().isVector() && "Cannot compare scalars to vectors"); assert(LHS.getValueType().isVector() == VT.isVector() && @@ -1094,7 +1096,7 @@ class SelectionDAG { if (Chain) return getNode(IsSignaling ? ISD::STRICT_FSETCCS : ISD::STRICT_FSETCC, DL, {VT, MVT::Other}, {Chain, LHS, RHS, getCondCode(Cond)}); - return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond), Flags); + return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond)); } /// Helper function to make it easier to build Select's if you just have diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9df930a6e3ba9..f67a06f81c559 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7437,7 +7437,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // FIXME Can we handle multiple uses? Could we token factor the chain // results from the new/old setcc? SDValue SetCC = - DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC, SDNodeFlags(), + DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC, N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS); CombineTo(N, SetCC); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index ea44fb2e722a7..1ff7b5e974c13 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1754,15 +1754,15 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode( if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). - SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, SDNodeFlags(), Chain, + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, SDNodeFlags(), Chain, + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling); } else { // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) - SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, SDNodeFlags(), Chain, + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, SDNodeFlags(), Chain, + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling); } if (Chain) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 9ab5e3865efdf..d738ef9df7f17 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1793,18 +1793,18 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, // The following can be improved, but not that much. SDValue Tmp1, Tmp2, Tmp3, OutputChain; Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, - RHSHi, ISD::SETOEQ, SDNodeFlags(), Chain, IsSignaling); + RHSHi, ISD::SETOEQ, Chain, IsSignaling); OutputChain = Tmp1->getNumValues() > 1 ? Tmp1.getValue(1) : SDValue(); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, - RHSLo, CCCode, SDNodeFlags(), OutputChain, IsSignaling); + RHSLo, CCCode, OutputChain, IsSignaling); OutputChain = Tmp2->getNumValues() > 1 ? Tmp2.getValue(1) : SDValue(); Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, - ISD::SETUNE, SDNodeFlags(), OutputChain, IsSignaling); + ISD::SETUNE, OutputChain, IsSignaling); OutputChain = Tmp1->getNumValues() > 1 ? Tmp1.getValue(1) : SDValue(); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, - RHSHi, CCCode, SDNodeFlags(), OutputChain, IsSignaling); + RHSHi, CCCode, OutputChain, IsSignaling); OutputChain = Tmp2->getNumValues() > 1 ? Tmp2.getValue(1) : SDValue(); Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index eef6a314a21f6..a166b790bca66 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3097,10 +3097,11 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDNodeFlags Flags; Flags.copyFMF(*FPMO); + SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); - setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition, Flags)); + setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); } // Check if the condition of the select has one use or two users that are both diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 80d92dbe886d9..c54f6996c007d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6426,7 +6426,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, SDValue Sel; if (Node->isStrictFPOpcode()) { - Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, SDNodeFlags(), + Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, Node->getOperand(0), /*IsSignaling*/ true); Chain = Sel.getValue(1); } else { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 0efb03589ef60..435cc80dd68e6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8287,7 +8287,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, EVT DstSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT); SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, - SDNodeFlags(), Chain, true); + Chain, true); Chain = Sel.getValue(1); SDValue FltOfs = DAG.getSelect( diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8638db813360f..781e932dc5079 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20457,7 +20457,7 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, *DAG.getContext(), TheVT); SDValue Cmp; if (IsStrict) { - Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETLT, SDNodeFlags(), + Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETLT, Chain, /*IsSignaling*/ true); Chain = Cmp.getValue(1); } else { diff --git a/llvm/test/CodeGen/X86/fmf-propagation.ll b/llvm/test/CodeGen/X86/fmf-propagation.ll index 8d16e1acda770..22ecdc1cf30b6 100644 --- a/llvm/test/CodeGen/X86/fmf-propagation.ll +++ b/llvm/test/CodeGen/X86/fmf-propagation.ll @@ -28,7 +28,7 @@ define float @fmf_transfer(float %x, float %y) { ret float %f8 } -; CHECK: Optimized type-legalized selection DAG: %bb.0 'fmf_setcc:' +; CHECK-LABEL: Optimized type-legalized selection DAG: %bb.0 'fmf_setcc:' ; CHECK: t13: i8 = setcc nnan ninf nsz arcp contract afn reassoc t2, ConstantFP:f32<0.000000e+00>, setlt:ch define float @fmf_setcc(float %x, float %y) { @@ -36,3 +36,11 @@ define float @fmf_setcc(float %x, float %y) { %ret = select i1 %cmp, float %x, float %y ret float %ret } + +; CHECK-LABEL: Initial selection DAG: %bb.0 'fmf_setcc_canon:' +; CHECK: t14: i8 = setcc nnan ninf nsz arcp contract afn reassoc t2, ConstantFP:f32<0.000000e+00>, setgt:ch +define float @fmf_setcc_canon(float %x, float %y) { + %cmp = fcmp fast ult float 0.0, %x + %ret = select i1 %cmp, float %x, float %y + ret float %ret +} From 4da4e7cb2092c80ccb5c6c7e013b0f103c14d3e0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 5 Oct 2020 15:03:07 -0700 Subject: [PATCH 082/321] [X86] Remove X86ISD::LCMPXCHG8_SAVE_EBX_DAG and LCMPXCHG8B_SAVE_EBX pseudo instruction This and its friend X86ISD::LCMPXCHG8_SAVE_RBX_DAG are used if we need to avoid clobbering the frame pointer in EBX/RBX. EBX/RBX are only used a frame pointer in 64-bit mode. In 64-bit mode we don't use CMPXCHG8B since we have a GR64 cmpxchg available. So we don't need special handling for LCMPXCHG8B. Split from D88808 Differential Revision: https://reviews.llvm.org/D88853 --- llvm/lib/Target/X86/X86ExpandPseudo.cpp | 12 +++------ llvm/lib/Target/X86/X86ISelLowering.cpp | 23 +++++------------ llvm/lib/Target/X86/X86InstrCompiler.td | 34 ++++++++----------------- llvm/lib/Target/X86/X86InstrInfo.td | 7 ----- 4 files changed, 19 insertions(+), 57 deletions(-) diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index 7a593b8ff7093..a5a1a4ff93e6f 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -334,7 +334,6 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MBB.erase(MBBI); return true; } - case X86::LCMPXCHG8B_SAVE_EBX: case X86::LCMPXCHG16B_SAVE_RBX: { // Perform the following transformation. // SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx @@ -345,21 +344,16 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, const MachineOperand &InArg = MBBI->getOperand(6); Register SaveRbx = MBBI->getOperand(7).getReg(); - unsigned ActualInArg = - Opcode == X86::LCMPXCHG8B_SAVE_EBX ? X86::EBX : X86::RBX; // Copy the input argument of the pseudo into the argument of the // actual instruction. - TII->copyPhysReg(MBB, MBBI, DL, ActualInArg, InArg.getReg(), - InArg.isKill()); + TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, InArg.getReg(), InArg.isKill()); // Create the actual instruction. - unsigned ActualOpc = - Opcode == X86::LCMPXCHG8B_SAVE_EBX ? X86::LCMPXCHG8B : X86::LCMPXCHG16B; - MachineInstr *NewInstr = BuildMI(MBB, MBBI, DL, TII->get(ActualOpc)); + MachineInstr *NewInstr = BuildMI(MBB, MBBI, DL, TII->get(X86::LCMPXCHG16B)); // Copy the operands related to the address. for (unsigned Idx = 1; Idx < 6; ++Idx) NewInstr->addOperand(MBBI->getOperand(Idx)); // Finally, restore the value of RBX. - TII->copyPhysReg(MBB, MBBI, DL, ActualInArg, SaveRbx, + TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, SaveRbx, /*SrcIsKill*/ true); // Delete the pseudo. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 781e932dc5079..bd80812d5b101 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30494,23 +30494,15 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, MachineMemOperand *MMO = cast(N)->getMemOperand(); if (TRI->hasBasePointer(DAG.getMachineFunction()) && (BasePtr == X86::RBX || BasePtr == X86::EBX)) { - // ISel prefers the LCMPXCHG64 variant. - // If that assert breaks, that means it is not the case anymore, - // and we need to teach LCMPXCHG8_SAVE_EBX_DAG how to save RBX, - // not just EBX. This is a matter of accepting i64 input for that - // pseudo, and restoring into the register of the right wide - // in expand pseudo. Everything else should just work. - assert(((Regs64bit == (BasePtr == X86::RBX)) || BasePtr == X86::EBX) && - "Saving only half of the RBX"); - unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_SAVE_RBX_DAG - : X86ISD::LCMPXCHG8_SAVE_EBX_DAG; + assert(Regs64bit && "RBX/EBX base pointer only expected for i128 CAS"); SDValue RBXSave = DAG.getCopyFromReg(swapInH.getValue(0), dl, - Regs64bit ? X86::RBX : X86::EBX, + X86::RBX, HalfT, swapInH.getValue(1)); SDValue Ops[] = {/*Chain*/ RBXSave.getValue(1), N->getOperand(1), swapInL, RBXSave, /*Glue*/ RBXSave.getValue(2)}; - Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO); + Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG16_SAVE_RBX_DAG, dl, Tys, + Ops, T, MMO); } else { unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG : X86ISD::LCMPXCHG8_DAG; @@ -33780,12 +33772,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, } case X86::LCMPXCHG16B: return BB; - case X86::LCMPXCHG8B_SAVE_EBX: case X86::LCMPXCHG16B_SAVE_RBX: { - unsigned BasePtr = - MI.getOpcode() == X86::LCMPXCHG8B_SAVE_EBX ? X86::EBX : X86::RBX; - if (!BB->isLiveIn(BasePtr)) - BB->addLiveIn(BasePtr); + if (!BB->isLiveIn(X86::RBX)) + BB->addLiveIn(X86::RBX); return BB; } case X86::MWAITX: { diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 07e7b17230e1b..195ea8b1b1272 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -845,6 +845,12 @@ let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>; } +let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], + Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW] in { +defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b", + X86cas16, i128mem>, REX_W; +} + // This pseudo must be used when the frame uses RBX as // the base pointer. Indeed, in such situation RBX is a reserved // register and the register allocator will ignore any use/def of @@ -852,38 +858,18 @@ defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>; // RBX that will happen when setting the arguments for the instrucion. // // Unlike the actual related instruction, we mark that this one -// defines EBX (instead of using EBX). +// defines RBX (instead of using RBX). // The rationale is that we will define RBX during the expansion of -// the pseudo. The argument feeding EBX is ebx_input. +// the pseudo. The argument feeding RBX is rbx_input. // -// The additional argument, $ebx_save, is a temporary register used to +// The additional argument, $rbx_save, is a temporary register used to // save the value of RBX across the actual instruction. // -// To make sure the register assigned to $ebx_save does not interfere with +// To make sure the register assigned to $rbx_save does not interfere with // the definition of the actual instruction, we use a definition $dst which // is tied to $rbx_save. That way, the live-range of $rbx_save spans across // the instruction and we are sure we will have a valid register to restore // the value of RBX. -let Defs = [EAX, EDX, EBX, EFLAGS], Uses = [EAX, ECX, EDX], - Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW], - isCodeGenOnly = 1, isPseudo = 1, Constraints = "$ebx_save = $dst", - usesCustomInserter = 1 in { -def LCMPXCHG8B_SAVE_EBX : - I<0, Pseudo, (outs GR32:$dst), - (ins i64mem:$ptr, GR32:$ebx_input, GR32:$ebx_save), - !strconcat("cmpxchg8b", "\t$ptr"), - [(set GR32:$dst, (X86cas8save_ebx addr:$ptr, GR32:$ebx_input, - GR32:$ebx_save))]>; -} - - -let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], - Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW] in { -defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b", - X86cas16, i128mem>, REX_W; -} - -// Same as LCMPXCHG8B_SAVE_RBX but for the 16 Bytes variant. let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX], Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst", diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 0ac8cb9c27764..ada5c2ffdc0bb 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -70,9 +70,6 @@ def SDTX86wrpkru : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, SDTCisVT<2, i8>]>; def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; -def SDTX86caspairSaveEbx8 : SDTypeProfile<1, 3, - [SDTCisVT<0, i32>, SDTCisPtrTy<1>, - SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; def SDTX86caspairSaveRbx16 : SDTypeProfile<1, 3, [SDTCisVT<0, i64>, SDTCisPtrTy<1>, SDTCisVT<2, i64>, SDTCisVT<3, i64>]>; @@ -180,10 +177,6 @@ def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86caspair, def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def X86cas8save_ebx : SDNode<"X86ISD::LCMPXCHG8_SAVE_EBX_DAG", - SDTX86caspairSaveEbx8, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue, - SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86cas16save_rbx : SDNode<"X86ISD::LCMPXCHG16_SAVE_RBX_DAG", SDTX86caspairSaveRbx16, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, From e426ae2bd5e04ca57f36bfb64a5a466a6797cea1 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 5 Oct 2020 22:54:22 +0100 Subject: [PATCH 083/321] [libcxx][dsl] Fix mutable default argument warning This is flagged by PyCharm and can cause subtle bugs. While changing this also re-sort the imports and add missing ones. Reviewed By: #libc, ldionne Differential Revision: https://reviews.llvm.org/D88816 --- libcxx/utils/libcxx/test/dsl.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/libcxx/utils/libcxx/test/dsl.py b/libcxx/utils/libcxx/test/dsl.py index 95d23df7ece13..820a5b341d5de 100644 --- a/libcxx/utils/libcxx/test/dsl.py +++ b/libcxx/utils/libcxx/test/dsl.py @@ -6,15 +6,20 @@ # #===----------------------------------------------------------------------===## -import libcxx.test.format -import lit -import lit.util import os import pipes import platform import re import tempfile +import libcxx.test.format +import lit +import lit.LitConfig +import lit.Test +import lit.TestRunner +import lit.util + + def _memoize(f): cache = dict() def memoized(x): @@ -83,7 +88,7 @@ def sourceBuilds(config, source): _executeScriptInternal(test, ['rm %t.exe']) return exitCode == 0 -def programOutput(config, program, args=[], testPrefix=''): +def programOutput(config, program, args=None, testPrefix=''): """ Compiles a program for the test target, run it on the test target and return the output. @@ -92,6 +97,8 @@ def programOutput(config, program, args=[], testPrefix=''): execution of the program is done through the %{exec} substitution, which means that the program may be run on a remote host depending on what %{exec} does. """ + if args is None: + args = [] with _makeConfigTest(config, testPrefix=testPrefix) as test: with open(test.getSourcePath(), 'w') as source: source.write(program) From 69f87400a85e13482c535365bb19272a15d054b9 Mon Sep 17 00:00:00 2001 From: Joachim Protze Date: Tue, 6 Oct 2020 00:06:40 +0200 Subject: [PATCH 084/321] [OpenMP][Archer][Tests] NFC: fix spurious test failure The test disables suppression and therefore sometimes triggers a know false positive in the openmp runtime. The test should only verify that the env var is handles as expected. --- openmp/tools/archer/tests/lit.cfg | 2 +- openmp/tools/archer/tests/parallel/parallel-nosuppression.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/openmp/tools/archer/tests/lit.cfg b/openmp/tools/archer/tests/lit.cfg index f064127817d62..ba810d97c3c42 100644 --- a/openmp/tools/archer/tests/lit.cfg +++ b/openmp/tools/archer/tests/lit.cfg @@ -111,7 +111,7 @@ config.substitutions.append(("%clang-archer", config.test_c_compiler)) config.substitutions.append(("%openmp_flags", config.test_openmp_flags)) config.substitutions.append(("%archer_flags", config.archer_flags)) config.substitutions.append(("%flags", config.test_flags)) -config.substitutions.append(("%nosuppression", "env TSAN_OPTIONS='ignore_noninstrumented_modules=0'")) +config.substitutions.append(("%nosuppression", "env TSAN_OPTIONS='ignore_noninstrumented_modules=0:exitcode=0'")) config.substitutions.append(("%suppression", "env TSAN_OPTIONS='ignore_noninstrumented_modules=0:ignore_noninstrumented_modules=1'")) config.substitutions.append(("%deflake", os.path.join(os.path.dirname(__file__), "deflake.bash"))) diff --git a/openmp/tools/archer/tests/parallel/parallel-nosuppression.c b/openmp/tools/archer/tests/parallel/parallel-nosuppression.c index f0e1cd8b5e468..de46ace01dbbc 100644 --- a/openmp/tools/archer/tests/parallel/parallel-nosuppression.c +++ b/openmp/tools/archer/tests/parallel/parallel-nosuppression.c @@ -34,7 +34,5 @@ int main(int argc, char *argv[]) { return error; } -// CHECK-NOT: ThreadSanitizer: data race -// CHECK-NOT: ThreadSanitizer: reported // CHECK: Warning: please export TSAN_OPTIONS // CHECK: DONE From a4b842e29411da2d08fed0e99918a7cf089bb84d Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Fri, 2 Oct 2020 18:05:10 -0700 Subject: [PATCH 085/321] Show register names in DWARF unwind info. Register context information was already being passed into the DWARFDebugFrame code that dumps unwind information but it wasn't being used. This change adds the ability to dump registers names of a valid MC register context was passed in and if it knows about the register. Updated the tests to use the newly returned register names. Differential Revision: https://reviews.llvm.org/D88767 --- llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp | 16 +++++++++++++++- llvm/test/DebugInfo/RISCV/eh-frame.s | 2 +- .../DebugInfo/dwarfdump-debug-frame-simple.test | 8 ++++---- llvm/test/MC/ELF/cfi-restore-extended.s | 3 +-- llvm/test/MC/Mips/eh-frame.s | 4 ++-- llvm/test/MC/X86/i386-darwin-frame-register.ll | 2 +- 6 files changed, 24 insertions(+), 11 deletions(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index ba7449baaf7f0..5a05baca33369 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DataExtractor.h" @@ -29,6 +30,18 @@ using namespace llvm; using namespace dwarf; +static void printRegister(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH, + unsigned RegNum) { + if (MRI) { + if (Optional LLVMRegNum = MRI->getLLVMRegNum(RegNum, IsEH)) { + if (const char *RegName = MRI->getName(*LLVMRegNum)) { + OS << RegName; + return; + } + } + } + OS << "reg" << RegNum; +} // See DWARF standard v3, section 7.23 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; @@ -268,7 +281,8 @@ void CFIProgram::printOperand(raw_ostream &OS, const MCRegisterInfo *MRI, OS << format(" %" PRId64 "*data_alignment_factor" , Operand); break; case OT_Register: - OS << format(" reg%" PRId64, Operand); + OS << ' '; + printRegister(OS, MRI, IsEH, Operand); break; case OT_Expression: assert(Instr.Expression && "missing DWARFExpression object"); diff --git a/llvm/test/DebugInfo/RISCV/eh-frame.s b/llvm/test/DebugInfo/RISCV/eh-frame.s index f518d94c2baf8..8bc3ae0dfe435 100644 --- a/llvm/test/DebugInfo/RISCV/eh-frame.s +++ b/llvm/test/DebugInfo/RISCV/eh-frame.s @@ -26,7 +26,7 @@ func: # FDECFIEncoding and should be DW_EH_PE_pcrel | DW_EH_PE_sdata4 (0x1b). # CHECK: Augmentation data: 1B -# CHECK: DW_CFA_def_cfa: reg2 +0 +# CHECK: DW_CFA_def_cfa: X2 +0 # # CHECK: 00000014 00000010 00000018 FDE cie=00000000 pc=00000000...00000004 # CHECK: DW_CFA_nop: diff --git a/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test b/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test index 7193abc6cc03a..34fc1f8c25e2c 100644 --- a/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test +++ b/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test @@ -5,8 +5,8 @@ ; FRAMES: 00000000 00000010 ffffffff CIE ; FRAMES: Version: 1 -; FRAMES: DW_CFA_def_cfa: reg4 +4 -; FRAMES-NEXT: DW_CFA_offset: reg8 -4 +; FRAMES: DW_CFA_def_cfa: ESP +4 +; FRAMES-NEXT: DW_CFA_offset: EIP -4 ; FRAMES-NEXT: DW_CFA_nop: ; FRAMES-NEXT: DW_CFA_nop: @@ -18,9 +18,9 @@ ; FRAMES: 00000028 00000014 00000000 FDE cie=00000000 pc=00000030...00000080 ; FRAMES: DW_CFA_advance_loc: 1 ; FRAMES-NEXT: DW_CFA_def_cfa_offset: +8 -; FRAMES-NEXT: DW_CFA_offset: reg5 -8 +; FRAMES-NEXT: DW_CFA_offset: EBP -8 ; FRAMES-NEXT: DW_CFA_advance_loc: 2 -; FRAMES-NEXT: DW_CFA_def_cfa_register: reg5 +; FRAMES-NEXT: DW_CFA_def_cfa_register: EBP ; FRAMES-NOT: CIE ; FRAMES-NOT: FDE diff --git a/llvm/test/MC/ELF/cfi-restore-extended.s b/llvm/test/MC/ELF/cfi-restore-extended.s index e7371089934d9..bcfcf782a786a 100644 --- a/llvm/test/MC/ELF/cfi-restore-extended.s +++ b/llvm/test/MC/ELF/cfi-restore-extended.s @@ -6,7 +6,7 @@ f: nop // CHECK: DW_CFA_advance_loc: 1 .cfi_restore %rbp -// CHECK-NEXT: DW_CFA_restore: reg6 +// CHECK-NEXT: DW_CFA_restore: RBP nop // CHECK-NEXT: DW_CFA_advance_loc: 1 .cfi_restore 89 @@ -14,4 +14,3 @@ f: // CHECK-NEXT: DW_CFA_nop: nop .cfi_endproc - diff --git a/llvm/test/MC/Mips/eh-frame.s b/llvm/test/MC/Mips/eh-frame.s index 024b9e6ac4889..fd145317bf4d6 100644 --- a/llvm/test/MC/Mips/eh-frame.s +++ b/llvm/test/MC/Mips/eh-frame.s @@ -66,7 +66,7 @@ func: // DWARF32_PIC-NEXT: Augmentation data: 1B // ^^ fde pointer encoding: DW_EH_PE_pcrel | DW_EH_PE_sdata4 // DWARF32-EMPTY: -// DWARF32-NEXT: DW_CFA_def_cfa_register: reg29 +// DWARF32-NEXT: DW_CFA_def_cfa_register: SP_64 // // DWARF32_ABS: 00000014 00000010 00000018 FDE cie=00000000 pc=00000000...00000000 // DWARF32_PIC: 00000014 00000010 00000018 FDE cie=00000000 pc=0000001c...0000001c @@ -90,7 +90,7 @@ func: // DWARF64_PIC: Augmentation data: 1B // ^^ fde pointer encoding: DW_EH_PE_pcrel | DW_EH_PE_sdata4 // DWARF64-EMPTY: -// DWARF64-NEXT: DW_CFA_def_cfa_register: reg29 +// DWARF64-NEXT: DW_CFA_def_cfa_register: SP_64 // DWARF64_PIC-NEXT: DW_CFA_nop: // // DWARF64_ABS: 00000014 00000018 00000018 FDE cie=00000000 pc=00000000...00000000 diff --git a/llvm/test/MC/X86/i386-darwin-frame-register.ll b/llvm/test/MC/X86/i386-darwin-frame-register.ll index f4eb110cb08d7..1fb8ae989c7dd 100644 --- a/llvm/test/MC/X86/i386-darwin-frame-register.ll +++ b/llvm/test/MC/X86/i386-darwin-frame-register.ll @@ -11,7 +11,7 @@ ; CHECK: .debug_frame contents: ; CHECK: ffffffff CIE ; CHECK-NOT: {{CIE|FDE}} -; CHECK: DW_CFA_def_cfa: reg4 +4 +; CHECK: DW_CFA_def_cfa: ESP +4 ; ModuleID = 'foo.c' target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" From be66987e2047636d9ed9d2a4d88b762d59ae88f2 Mon Sep 17 00:00:00 2001 From: Jim Ingham Date: Fri, 2 Oct 2020 12:43:24 -0700 Subject: [PATCH 086/321] Fix raciness in the StopHook check for "has the target run". This was looking at the privateState, but it's possible that the actual process has started up and then stopped again by the time we get to the check, which would lead us to get out of running the stop hooks too early. Instead we need to track the intention of the stop hooks directly. Differential Revision: https://reviews.llvm.org/D88753 --- lldb/include/lldb/Target/Target.h | 20 +++- lldb/source/Target/Process.cpp | 3 +- lldb/source/Target/Target.cpp | 104 ++++++++++++------ .../target/stop-hooks/TestStopHookScripted.py | 2 - 4 files changed, 86 insertions(+), 43 deletions(-) diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index 94c6ebeac10da..7ee27a9776d5c 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -1145,6 +1145,11 @@ class Target : public std::enable_shared_from_this, virtual ~StopHook() = default; enum class StopHookKind : uint32_t { CommandBased = 0, ScriptBased }; + enum class StopHookResult : uint32_t { + KeepStopped = 0, + RequestContinue, + AlreadyContinued + }; lldb::TargetSP &GetTarget() { return m_target_sp; } @@ -1160,8 +1165,8 @@ class Target : public std::enable_shared_from_this, // with a reason" thread. It should add to the stream whatever text it // wants to show the user, and return False to indicate it wants the target // not to stop. - virtual bool HandleStop(ExecutionContext &exe_ctx, - lldb::StreamSP output) = 0; + virtual StopHookResult HandleStop(ExecutionContext &exe_ctx, + lldb::StreamSP output) = 0; // Set the Thread Specifier. The stop hook will own the thread specifier, // and is responsible for deleting it when we're done. @@ -1201,8 +1206,8 @@ class Target : public std::enable_shared_from_this, void SetActionFromString(const std::string &strings); void SetActionFromStrings(const std::vector &strings); - bool HandleStop(ExecutionContext &exc_ctx, - lldb::StreamSP output_sp) override; + StopHookResult HandleStop(ExecutionContext &exc_ctx, + lldb::StreamSP output_sp) override; void GetSubclassDescription(Stream *s, lldb::DescriptionLevel level) const override; @@ -1219,7 +1224,8 @@ class Target : public std::enable_shared_from_this, class StopHookScripted : public StopHook { public: virtual ~StopHookScripted() = default; - bool HandleStop(ExecutionContext &exc_ctx, lldb::StreamSP output) override; + StopHookResult HandleStop(ExecutionContext &exc_ctx, + lldb::StreamSP output) override; Status SetScriptCallback(std::string class_name, StructuredData::ObjectSP extra_args_sp); @@ -1254,7 +1260,9 @@ class Target : public std::enable_shared_from_this, /// remove the stop hook, as it will also reset the stop hook counter. void UndoCreateStopHook(lldb::user_id_t uid); - void RunStopHooks(); + // Runs the stop hooks that have been registered for this target. + // Returns true if the stop hooks cause the target to resume. + bool RunStopHooks(); size_t GetStopHookSize(); diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index c602511daedc3..490ca45bfee22 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -4178,8 +4178,7 @@ void Process::ProcessEventData::DoOnRemoval(Event *event_ptr) { // public (or SyncResume) broadcasters. StopHooks are just for // real public stops. They might also restart the target, // so watch for that. - process_sp->GetTarget().RunStopHooks(); - if (process_sp->GetPrivateState() == eStateRunning) + if (process_sp->GetTarget().RunStopHooks()) SetRestarted(true); } } diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index a5250ddcef741..49af6c297cbcb 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -2541,25 +2541,26 @@ void Target::SetAllStopHooksActiveState(bool active_state) { } } -void Target::RunStopHooks() { +bool Target::RunStopHooks() { if (m_suppress_stop_hooks) - return; + return false; if (!m_process_sp) - return; + return false; // Somebody might have restarted the process: + // Still return false, the return value is about US restarting the target. if (m_process_sp->GetState() != eStateStopped) - return; + return false; // make sure we check that we are not stopped // because of us running a user expression since in that case we do not want // to run the stop-hooks if (m_process_sp->GetModIDRef().IsLastResumeForUserExpression()) - return; + return false; if (m_stop_hooks.empty()) - return; + return false; // If there aren't any active stop hooks, don't bother either. bool any_active_hooks = false; @@ -2570,7 +2571,7 @@ void Target::RunStopHooks() { } } if (!any_active_hooks) - return; + return false; std::vector exc_ctx_with_reasons; @@ -2588,7 +2589,7 @@ void Target::RunStopHooks() { // If no threads stopped for a reason, don't run the stop-hooks. size_t num_exe_ctx = exc_ctx_with_reasons.size(); if (num_exe_ctx == 0) - return; + return false; StreamSP output_sp = m_debugger.GetAsyncOutputStream(); @@ -2636,22 +2637,27 @@ void Target::RunStopHooks() { output_sp->Printf("-- Thread %d\n", exc_ctx.GetThreadPtr()->GetIndexID()); - bool this_should_stop = cur_hook_sp->HandleStop(exc_ctx, output_sp); - // If this hook is set to auto-continue that should override the - // HandleStop result... - if (cur_hook_sp->GetAutoContinue()) - this_should_stop = false; + StopHook::StopHookResult this_result = + cur_hook_sp->HandleStop(exc_ctx, output_sp); + bool this_should_stop = true; - // If anybody wanted to stop, we should all stop. - if (!should_stop) - should_stop = this_should_stop; + switch (this_result) { + case StopHook::StopHookResult::KeepStopped: + // If this hook is set to auto-continue that should override the + // HandleStop result... + if (cur_hook_sp->GetAutoContinue()) + this_should_stop = false; + else + this_should_stop = true; - // We don't have a good way to prohibit people from restarting the target - // willy nilly in a stop hook. So see if the private state is running - // here and bag out if it is. - // FIXME: when we are doing non-stop mode for realz we'll have to instead - // track each thread, and only bag out if a thread is set running. - if (m_process_sp->GetPrivateState() != eStateStopped) { + break; + case StopHook::StopHookResult::RequestContinue: + this_should_stop = false; + break; + case StopHook::StopHookResult::AlreadyContinued: + // We don't have a good way to prohibit people from restarting the + // target willy nilly in a stop hook. If the hook did so, give a + // gentle suggestion here and bag out if the hook processing. output_sp->Printf("\nAborting stop hooks, hook %" PRIu64 " set the program running.\n" " Consider using '-G true' to make " @@ -2660,16 +2666,42 @@ void Target::RunStopHooks() { somebody_restarted = true; break; } + // If we're already restarted, stop processing stop hooks. + // FIXME: if we are doing non-stop mode for real, we would have to + // check that OUR thread was restarted, otherwise we should keep + // processing stop hooks. + if (somebody_restarted) + break; + + // If anybody wanted to stop, we should all stop. + if (!should_stop) + should_stop = this_should_stop; } } output_sp->Flush(); + // If one of the commands in the stop hook already restarted the target, + // report that fact. + if (somebody_restarted) + return true; + // Finally, if auto-continue was requested, do it now: // We only compute should_stop against the hook results if a hook got to run // which is why we have to do this conjoint test. - if (!somebody_restarted && ((hooks_ran && !should_stop) || auto_continue)) - m_process_sp->PrivateResume(); + if ((hooks_ran && !should_stop) || auto_continue) { + Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); + Status error = m_process_sp->PrivateResume(); + if (error.Success()) { + LLDB_LOG(log, "Resuming from RunStopHooks"); + return true; + } else { + LLDB_LOG(log, "Resuming from RunStopHooks failed: {0}", error); + return false; + } + } + + return false; } const TargetPropertiesSP &Target::GetGlobalProperties() { @@ -3235,13 +3267,14 @@ void Target::StopHookCommandLine::SetActionFromStrings( GetCommands().AppendString(string.c_str()); } -bool Target::StopHookCommandLine::HandleStop(ExecutionContext &exc_ctx, - StreamSP output_sp) { +Target::StopHook::StopHookResult +Target::StopHookCommandLine::HandleStop(ExecutionContext &exc_ctx, + StreamSP output_sp) { assert(exc_ctx.GetTargetPtr() && "Can't call PerformAction on a context " "with no target"); if (!m_commands.GetSize()) - return true; + return StopHookResult::KeepStopped; CommandReturnObject result(false); result.SetImmediateOutputStream(output_sp); @@ -3260,8 +3293,11 @@ bool Target::StopHookCommandLine::HandleStop(ExecutionContext &exc_ctx, debugger.GetCommandInterpreter().HandleCommands(GetCommands(), &exc_ctx, options, result); debugger.SetAsyncExecution(old_async); - - return true; + lldb::ReturnStatus status = result.GetStatus(); + if (status == eReturnStatusSuccessContinuingNoResult || + status == eReturnStatusSuccessContinuingResult) + return StopHookResult::AlreadyContinued; + return StopHookResult::KeepStopped; } // Target::StopHookScripted @@ -3289,20 +3325,22 @@ Status Target::StopHookScripted::SetScriptCallback( return error; } -bool Target::StopHookScripted::HandleStop(ExecutionContext &exc_ctx, - StreamSP output_sp) { +Target::StopHook::StopHookResult +Target::StopHookScripted::HandleStop(ExecutionContext &exc_ctx, + StreamSP output_sp) { assert(exc_ctx.GetTargetPtr() && "Can't call HandleStop on a context " "with no target"); ScriptInterpreter *script_interp = GetTarget()->GetDebugger().GetScriptInterpreter(); if (!script_interp) - return true; + return StopHookResult::KeepStopped; bool should_stop = script_interp->ScriptedStopHookHandleStop( m_implementation_sp, exc_ctx, output_sp); - return should_stop; + return should_stop ? StopHookResult::KeepStopped + : StopHookResult::RequestContinue; } void Target::StopHookScripted::GetSubclassDescription( diff --git a/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py b/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py index 014890e0d973b..7ef5a72b9f6fa 100644 --- a/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py +++ b/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py @@ -71,8 +71,6 @@ def test_stop_hooks_scripted_return_false(self): """Test that the returning False from a stop hook works""" self.do_test_auto_continue(True) - # Test is flakey on Linux. - @skipIfLinux def do_test_auto_continue(self, return_true): """Test that auto-continue works.""" # We set auto-continue to 1 but the stop hook only applies to step_out_of_me, From 2ea8fec2cd566be77c9412aacb8e546bd0c36612 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 5 Oct 2020 18:50:37 -0400 Subject: [PATCH 087/321] [libc++] Improve tests for std::quoted Instead of using ad-hoc mechanisms to disable the tests in C++ < 14, use UNSUPPORTED markup. --- .../quoted.manip/quoted.pass.cpp | 12 +++++----- ...ompile.fail.cpp => quoted_char.verify.cpp} | 20 ++++++---------- ...pile.fail.cpp => quoted_traits.verify.cpp} | 23 +++++++------------ 3 files changed, 21 insertions(+), 34 deletions(-) rename libcxx/test/std/input.output/iostream.format/quoted.manip/{quoted_char.compile.fail.cpp => quoted_char.verify.cpp} (72%) rename libcxx/test/std/input.output/iostream.format/quoted.manip/{quoted_traits.compile.fail.cpp => quoted_traits.verify.cpp} (77%) diff --git a/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted.pass.cpp b/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted.pass.cpp index 6ad0ec11808c5..854e271fcabb5 100644 --- a/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted.pass.cpp @@ -22,7 +22,7 @@ template bool is_skipws ( const std::basic_istream& is ) { return ( is.flags() & std::ios_base::skipws ) != 0; - } +} template > void both_ways ( const CharT *p ) { @@ -34,7 +34,7 @@ void both_ways ( const CharT *p ) { ((void)skippingws); // Prevent unused warning ss << q; ss >> q; - } +} template > void round_trip ( const CharT *p ) { @@ -46,7 +46,7 @@ void round_trip ( const CharT *p ) { ss >> std::quoted(s); assert ( s == p ); assert ( skippingws == is_skipws ( ss )); - } +} template > @@ -60,7 +60,7 @@ void round_trip_ws ( const CharT *p ) { ss >> std::quoted(s); assert ( s == p ); assert ( skippingws == is_skipws ( ss )); - } +} template > void round_trip_d ( const CharT *p, char delim ) { @@ -71,7 +71,7 @@ void round_trip_d ( const CharT *p, char delim ) { std::basic_string s; ss >> std::quoted(s, d); assert ( s == p ); - } +} template > void round_trip_e ( const CharT *p, char escape ) { @@ -82,7 +82,7 @@ void round_trip_e ( const CharT *p, char escape ) { std::basic_string s; ss >> std::quoted(s, CharT('"'), e ); assert ( s == p ); - } +} template > diff --git a/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted_char.compile.fail.cpp b/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted_char.verify.cpp similarity index 72% rename from libcxx/test/std/input.output/iostream.format/quoted.manip/quoted_char.compile.fail.cpp rename to libcxx/test/std/input.output/iostream.format/quoted.manip/quoted_char.verify.cpp index 4b343013e2583..9340edbf80a31 100644 --- a/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted_char.compile.fail.cpp +++ b/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted_char.verify.cpp @@ -10,6 +10,8 @@ // quoted +// UNSUPPORTED: c++03, c++11 + #include #include #include @@ -19,21 +21,13 @@ // Test that mismatches between strings and wide streams are diagnosed -#if TEST_STD_VER > 11 - void round_trip ( const char *p ) { std::wstringstream ss; - ss << std::quoted(p); + ss << std::quoted(p); // expected-error {{invalid operands to binary expression}} std::string s; - ss >> std::quoted(s); - } - - + ss >> std::quoted(s); // expected-error {{invalid operands to binary expression}} +} -int main(int, char**) -{ - round_trip ( "Hi Mom" ); +int main(int, char**) { + round_trip("Hi Mom"); } -#else -#error -#endif diff --git a/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted_traits.compile.fail.cpp b/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted_traits.verify.cpp similarity index 77% rename from libcxx/test/std/input.output/iostream.format/quoted.manip/quoted_traits.compile.fail.cpp rename to libcxx/test/std/input.output/iostream.format/quoted.manip/quoted_traits.verify.cpp index b19eea376d825..5a92485ced56f 100644 --- a/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted_traits.compile.fail.cpp +++ b/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted_traits.verify.cpp @@ -10,6 +10,8 @@ // quoted +// UNSUPPORTED: c++03, c++11 + #include #include #include @@ -17,29 +19,20 @@ #include "test_macros.h" -#if TEST_STD_VER > 11 - // Test that mismatches in the traits between the quoted object and the dest string are diagnosed. template -struct test_traits -{ - typedef charT char_type; +struct test_traits { + typedef charT char_type; }; void round_trip ( const char *p ) { std::stringstream ss; ss << std::quoted(p); std::basic_string> s; - ss >> std::quoted(s); - } - - + ss >> std::quoted(s); // expected-error {{invalid operands to binary expression}} +} -int main(int, char**) -{ - round_trip ( "Hi Mom" ); +int main(int, char**) { + round_trip("Hi Mom"); } -#else -#error -#endif From b268e24d4349d586334dd6f47aa99b2196b1046e Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 23 Sep 2020 21:58:45 -0700 Subject: [PATCH 088/321] [NFC][regalloc] Separate iteration from AllocationOrder This separates the two concerns - encapsulation of traversal order; and iteration. Differential Revision: https://reviews.llvm.org/D88256 --- llvm/lib/CodeGen/AllocationOrder.h | 89 +++++++++++++------ llvm/lib/CodeGen/RegAllocBasic.cpp | 3 +- llvm/lib/CodeGen/RegAllocGreedy.cpp | 45 ++++++---- .../unittests/CodeGen/AllocationOrderTest.cpp | 34 +++---- 4 files changed, 107 insertions(+), 64 deletions(-) diff --git a/llvm/lib/CodeGen/AllocationOrder.h b/llvm/lib/CodeGen/AllocationOrder.h index 368a3cd81d4c5..24ffee510a0f2 100644 --- a/llvm/lib/CodeGen/AllocationOrder.h +++ b/llvm/lib/CodeGen/AllocationOrder.h @@ -17,8 +17,8 @@ #define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCRegister.h" namespace llvm { @@ -30,12 +30,52 @@ class LiveRegMatrix; class LLVM_LIBRARY_VISIBILITY AllocationOrder { const SmallVector Hints; ArrayRef Order; - int Pos = 0; - - // If HardHints is true, *only* Hints will be returned. - const bool HardHints; + // How far into the Order we can iterate. This is 0 if the AllocationOrder is + // constructed with HardHints = true, Order.size() otherwise. While + // technically a size_t, it will participate in comparisons with the + // Iterator's Pos, which must be signed, so it's typed here as signed, too, to + // avoid warnings and under the assumption that the size of Order is + // relatively small. + // IterationLimit defines an invalid iterator position. + const int IterationLimit; public: + /// Forward iterator for an AllocationOrder. + class Iterator final { + const AllocationOrder &AO; + int Pos = 0; + + public: + Iterator(const AllocationOrder &AO, int Pos) : AO(AO), Pos(Pos) {} + + /// Return true if the curent position is that of a preferred register. + bool isHint() const { return Pos < 0; } + + /// Return the next physical register in the allocation order. + MCRegister operator*() const { + if (Pos < 0) + return AO.Hints.end()[Pos]; + assert(Pos < AO.IterationLimit); + return AO.Order[Pos]; + } + + /// Advance the iterator to the next position. If that's past the Hints + /// list, advance to the first value that's not also in the Hints list. + Iterator &operator++() { + if (Pos < AO.IterationLimit) + ++Pos; + while (Pos >= 0 && Pos < AO.IterationLimit && AO.isHint(AO.Order[Pos])) + ++Pos; + return *this; + } + + bool operator==(const Iterator &Other) const { + assert(&AO == &Other.AO); + return Pos == Other.Pos; + } + + bool operator!=(const Iterator &Other) const { return !(*this == Other); } + }; /// Create a new AllocationOrder for VirtReg. /// @param VirtReg Virtual register to allocate for. @@ -50,34 +90,25 @@ class LLVM_LIBRARY_VISIBILITY AllocationOrder { AllocationOrder(SmallVector &&Hints, ArrayRef Order, bool HardHints) : Hints(std::move(Hints)), Order(Order), - Pos(-static_cast(this->Hints.size())), HardHints(HardHints) {} + IterationLimit(HardHints ? 0 : static_cast(Order.size())) {} - /// Get the allocation order without reordered hints. - ArrayRef getOrder() const { return Order; } - - /// Return the next physical register in the allocation order, or 0. - /// It is safe to call next() again after it returned 0, it will keep - /// returning 0 until rewind() is called. - MCPhysReg next(unsigned Limit = 0) { - if (Pos < 0) - return Hints.end()[Pos++]; - if (HardHints) - return 0; - if (!Limit) - Limit = Order.size(); - while (Pos < int(Limit)) { - unsigned Reg = Order[Pos++]; - if (!isHint(Reg)) - return Reg; - } - return 0; + Iterator begin() const { + return Iterator(*this, -(static_cast(Hints.size()))); } - /// Start over from the beginning. - void rewind() { Pos = -int(Hints.size()); } + Iterator end() const { return Iterator(*this, IterationLimit); } - /// Return true if the last register returned from next() was a preferred register. - bool isHint() const { return Pos <= 0; } + Iterator getOrderLimitEnd(unsigned OrderLimit) const { + assert(OrderLimit <= Order.size()); + if (OrderLimit == 0) + return end(); + Iterator Ret(*this, + std::min(static_cast(OrderLimit) - 1, IterationLimit)); + return ++Ret; + } + + /// Get the allocation order without reordered hints. + ArrayRef getOrder() const { return Order; } /// Return true if PhysReg is a preferred register. bool isHint(unsigned PhysReg) const { return is_contained(Hints, PhysReg); } diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index 8bbbbeb78236c..83b5a05f92e9b 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -261,7 +261,8 @@ Register RABasic::selectOrSplit(LiveInterval &VirtReg, // Check for an available register in this class. auto Order = AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); - while (Register PhysReg = Order.next()) { + for (MCRegister PhysReg : Order) { + assert(PhysReg.isValid()); // Check for interference in PhysReg switch (Matrix->checkInterference(VirtReg, PhysReg)) { case LiveRegMatrix::IK_Free: diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index c1595391eca10..5b0f9384c04ce 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -757,12 +757,17 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs, const SmallVirtRegSet &FixedRegisters) { - Order.rewind(); Register PhysReg; - while ((PhysReg = Order.next())) - if (!Matrix->checkInterference(VirtReg, PhysReg)) - break; - if (!PhysReg || Order.isHint()) + for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) { + assert(*I); + if (!Matrix->checkInterference(VirtReg, *I)) { + if (I.isHint()) + return *I; + else + PhysReg = *I; + } + } + if (!PhysReg.isValid()) return PhysReg; // PhysReg is available, but there may be a better choice. @@ -803,12 +808,12 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg, Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) { auto Order = AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); - Register PhysReg; - while ((PhysReg = Order.next())) { - if (PhysReg == PrevReg) + MCRegister PhysReg; + for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) { + if ((*I).id() == PrevReg.id()) continue; - MCRegUnitIterator Units(PhysReg, TRI); + MCRegUnitIterator Units(*I, TRI); for (; Units.isValid(); ++Units) { // Instantiate a "subquery", not to be confused with the Queries array. LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]); @@ -817,7 +822,7 @@ Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) { } // If no units have interference, break out with the current PhysReg. if (!Units.isValid()) - break; + PhysReg = *I; } if (PhysReg) LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from " @@ -1134,8 +1139,10 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, } } - Order.rewind(); - while (MCRegister PhysReg = Order.next(OrderLimit)) { + for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E; + ++I) { + MCRegister PhysReg = *I; + assert(PhysReg); if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. @@ -1156,7 +1163,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, BestPhys = PhysReg; // Stop if the hint can be used. - if (Order.isHint()) + if (I.isHint()) break; } @@ -1849,8 +1856,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, unsigned &NumCands, bool IgnoreCSR, bool *CanCauseEvictionChain) { unsigned BestCand = NoCand; - Order.rewind(); - while (unsigned PhysReg = Order.next()) { + for (MCPhysReg PhysReg : Order) { + assert(PhysReg); if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg)) continue; @@ -2288,8 +2295,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, (1.0f / MBFI->getEntryFreq()); SmallVector GapWeight; - Order.rewind(); - while (unsigned PhysReg = Order.next()) { + for (MCPhysReg PhysReg : Order) { + assert(PhysReg); // Keep track of the largest spill weight that would need to be evicted in // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1]. calcGapWeights(PhysReg, GapWeight); @@ -2606,8 +2613,8 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, FixedRegisters.insert(VirtReg.reg()); SmallVector CurrentNewVRegs; - Order.rewind(); - while (Register PhysReg = Order.next()) { + for (MCRegister PhysReg : Order) { + assert(PhysReg.isValid()); LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " << printReg(PhysReg, TRI) << '\n'); RecoloringCandidates.clear(); diff --git a/llvm/unittests/CodeGen/AllocationOrderTest.cpp b/llvm/unittests/CodeGen/AllocationOrderTest.cpp index ba1a1e4f4c00c..d4da8e28ae7f7 100644 --- a/llvm/unittests/CodeGen/AllocationOrderTest.cpp +++ b/llvm/unittests/CodeGen/AllocationOrderTest.cpp @@ -12,11 +12,14 @@ using namespace llvm; namespace { -std::vector loadOrder(AllocationOrder &O, unsigned Limit = 0) { +std::vector loadOrder(const AllocationOrder &O, unsigned Limit = 0) { std::vector Ret; - O.rewind(); - while (auto R = O.next(Limit)) - Ret.push_back(R); + if (Limit == 0) + for (auto R : O) + Ret.push_back(R); + else + for (auto I = O.begin(), E = O.getOrderLimitEnd(Limit); I != E; ++I) + Ret.push_back(*I); return Ret; } } // namespace @@ -48,6 +51,7 @@ TEST(AllocationOrderTest, LimitsBasic) { AllocationOrder O(std::move(Hints), Order, false); EXPECT_EQ((std::vector{1, 2, 3, 4, 5, 6, 7}), loadOrder(O, 0)); EXPECT_EQ((std::vector{1, 2, 3, 4}), loadOrder(O, 1)); + EXPECT_EQ(O.end(), O.getOrderLimitEnd(0)); } TEST(AllocationOrderTest, LimitsDuplicates) { @@ -96,19 +100,19 @@ TEST(AllocationOrderTest, IsHintTest) { SmallVector Hints = {1, 2, 3}; SmallVector Order = {4, 1, 5, 6}; AllocationOrder O(std::move(Hints), Order, false); - O.rewind(); - auto V = O.next(); - EXPECT_TRUE(O.isHint()); + auto I = O.begin(); + auto V = *I; + EXPECT_TRUE(I.isHint()); EXPECT_EQ(V, 1U); - O.next(); - EXPECT_TRUE(O.isHint()); - O.next(); - EXPECT_TRUE(O.isHint()); - V = O.next(); - EXPECT_FALSE(O.isHint()); + ++I; + EXPECT_TRUE(I.isHint()); + ++I; + EXPECT_TRUE(I.isHint()); + V = *(++I); + EXPECT_FALSE(I.isHint()); EXPECT_EQ(V, 4U); - V = O.next(); + V = *(++I); EXPECT_TRUE(O.isHint(1)); - EXPECT_FALSE(O.isHint()); + EXPECT_FALSE(I.isHint()); EXPECT_EQ(V, 5U); } From a48d480e1f7ebc5d5f93507fe1f519496621e259 Mon Sep 17 00:00:00 2001 From: Evandro Menezes Date: Mon, 5 Oct 2020 19:20:59 -0500 Subject: [PATCH 089/321] [RISCV] Fix broken test Fix test for the SiFive E76 core. This patch fixes the issue introduced by the commit 5d6d8a2769. --- clang/test/Driver/riscv-cpus.c | 4 ++-- llvm/include/llvm/Support/RISCVTargetParser.def | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/test/Driver/riscv-cpus.c b/clang/test/Driver/riscv-cpus.c index 2bd0b26f3caf1..d551529f78278 100644 --- a/clang/test/Driver/riscv-cpus.c +++ b/clang/test/Driver/riscv-cpus.c @@ -31,9 +31,9 @@ // mcpu with default march // RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=sifive-e76 | FileCheck -check-prefix=MCPU-SIFIVE-E76 %s // MCPU-SIFIVE-E76: "-nostdsysteminc" "-target-cpu" "sifive-e76" -// MCPU-SIFIVE-E76: "-target-feature" "+m" "-target-feature" "+a" "-target-feature" "+f" "-target-feature" "+d" +// MCPU-SIFIVE-E76: "-target-feature" "+m" "-target-feature" "+a" "-target-feature" "+f" // MCPU-SIFIVE-E76: "-target-feature" "+c" -// MCPU-SIFIVE-E76: "-target-abi" "lp64d" +// MCPU-SIFIVE-E76: "-target-abi" "ilp32" // mcpu with mabi option // RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=sifive-u74 -mabi=lp64 | FileCheck -check-prefix=MCPU-ABI-SIFIVE-U74 %s diff --git a/llvm/include/llvm/Support/RISCVTargetParser.def b/llvm/include/llvm/Support/RISCVTargetParser.def index a63874fa5dd02..53eebcf328d33 100644 --- a/llvm/include/llvm/Support/RISCVTargetParser.def +++ b/llvm/include/llvm/Support/RISCVTargetParser.def @@ -7,8 +7,8 @@ PROC(GENERIC_RV32, {"generic-rv32"}, FK_NONE, {""}) PROC(GENERIC_RV64, {"generic-rv64"}, FK_64BIT, {""}) PROC(ROCKET_RV32, {"rocket-rv32"}, FK_NONE, {""}) PROC(ROCKET_RV64, {"rocket-rv64"}, FK_64BIT, {""}) -PROC(BULLET_RV32, {"sifive-7-rv32"}, FK_NONE, {""}) -PROC(BULLET_RV64, {"sifive-7-rv64"}, FK_64BIT, {""}) +PROC(SIFIVE_732, {"sifive-7-rv32"}, FK_NONE, {""}) +PROC(SIFIVE_764, {"sifive-7-rv64"}, FK_64BIT, {""}) PROC(SIFIVE_E31, {"sifive-e31"}, FK_NONE, {"rv32imac"}) PROC(SIFIVE_U54, {"sifive-u54"}, FK_64BIT, {"rv64gc"}) PROC(SIFIVE_E76, {"sifive-e76"}, FK_NONE, {"rv32imafc"}) From ea9d6392f4a375b5183be02ac93eb3a75c169360 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Tue, 6 Oct 2020 09:31:30 +0900 Subject: [PATCH 090/321] Fix reordering of instructions during VirtRegRewriter unbundling When unbundling COPY bundles in VirtRegRewriter the start of the bundle is not correctly referenced in the unbundling loop. The effect of this is that unbundled instructions are sometimes inserted out-of-order, particular in cases where multiple reordering have been applied to avoid clobbering dependencies. The resulting instruction sequence clobbers dependencies. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D88821 --- llvm/lib/CodeGen/VirtRegMap.cpp | 2 +- llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp index 4837cf11bc335..024fedb2eee2a 100644 --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -452,7 +452,7 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const { // instruction, the bundle will have been completely undone. if (BundledMI != BundleStart) { BundledMI->removeFromBundle(); - MBB.insert(FirstMI, BundledMI); + MBB.insert(BundleStart, BundledMI); } else if (BundledMI->isBundledWithSucc()) { BundledMI->unbundleFromSucc(); BundleStart = &*std::next(BundledMI->getIterator()); diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir index 423eb928b4817..05668eeeef7b5 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir @@ -298,11 +298,11 @@ body: | ; VR: SI_SPILL_S512_SAVE killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 64 into %stack.0, align 4, addrspace 5) ; VR: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98 ; VR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 64 from %stack.0, align 4, addrspace 5) - ; VR: renamable $sgpr16 = COPY killed renamable $sgpr20 + ; VR: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr16_sgpr17 ; VR: renamable $sgpr15 = COPY killed renamable $sgpr19 ; VR: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr22_sgpr23 + ; VR: renamable $sgpr16 = COPY killed renamable $sgpr20 ; VR: renamable $sgpr21 = COPY killed renamable $sgpr25 - ; VR: renamable $sgpr12_sgpr13 = COPY renamable $sgpr16_sgpr17 ; VR: renamable $sgpr22 = COPY killed renamable $sgpr26 ; VR: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = IMPLICIT_DEF ; VR: renamable $sgpr8 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr12, 0, 0 :: (dereferenceable invariant load 4) From c3e07a0018ec74216d662e0596584d2c846a1063 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Tue, 6 Oct 2020 09:44:50 +0900 Subject: [PATCH 091/321] [AMDGPU] SIInsertSkips: Refactor early exit block creation Refactor exit block creation to a single call ensureEarlyExitBlock. Add support for generating an early exit block which clears the exec mask, but only add this instruction when required. These changes are to facilitate adding more forms of early termination for PS shaders in the near future. Reviewed By: nhaehnle Differential Revision: https://reviews.llvm.org/D88775 --- llvm/lib/Target/AMDGPU/SIInsertSkips.cpp | 31 +++++++++++++++--------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp index 510d7fd8b8d8b..5bb5165bae523 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -58,12 +58,13 @@ class SIInsertSkips : public MachineFunctionPass { MachineDominatorTree *MDT = nullptr; MachineBasicBlock *EarlyExitBlock = nullptr; + bool EarlyExitClearsExec = false; bool shouldSkip(const MachineBasicBlock &From, const MachineBasicBlock &To) const; bool dominatesAllReachable(MachineBasicBlock &MBB); - void createEarlyExitBlock(MachineBasicBlock &MBB); + void ensureEarlyExitBlock(MachineBasicBlock &MBB, bool ClearExec); void skipIfDead(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL); @@ -180,15 +181,27 @@ static void generatePsEndPgm(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0); } -void SIInsertSkips::createEarlyExitBlock(MachineBasicBlock &MBB) { +void SIInsertSkips::ensureEarlyExitBlock(MachineBasicBlock &MBB, + bool ClearExec) { MachineFunction *MF = MBB.getParent(); DebugLoc DL; - assert(!EarlyExitBlock); - EarlyExitBlock = MF->CreateMachineBasicBlock(); - MF->insert(MF->end(), EarlyExitBlock); + if (!EarlyExitBlock) { + EarlyExitBlock = MF->CreateMachineBasicBlock(); + MF->insert(MF->end(), EarlyExitBlock); + generatePsEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII); + EarlyExitClearsExec = false; + } - generatePsEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII); + if (ClearExec && !EarlyExitClearsExec) { + const GCNSubtarget &ST = MF->getSubtarget(); + unsigned Mov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; + Register Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + auto ExitI = EarlyExitBlock->getFirstNonPHI(); + assert(ExitI->getOpcode() == AMDGPU::EXP_DONE); + BuildMI(*EarlyExitBlock, ExitI, DL, TII->get(Mov), Exec).addImm(0); + EarlyExitClearsExec = true; + } } static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI, @@ -233,11 +246,7 @@ void SIInsertSkips::skipIfDead(MachineBasicBlock &MBB, if (NoSuccessor) { generatePsEndPgm(MBB, I, DL, TII); } else { - if (!EarlyExitBlock) { - createEarlyExitBlock(MBB); - // Update next block pointer to reflect any new blocks - NextBBI = std::next(MBB.getIterator()); - } + ensureEarlyExitBlock(MBB, false); MachineInstr *BranchMI = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) From 36bb1fb1fe624012332d1f2788625ee4625f29db Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 2 Oct 2020 20:28:49 -0700 Subject: [PATCH 092/321] [MLInliner] Factor out logging Factored out the logging facility, to allow its reuse outside the inliner. Differential Revision: https://reviews.llvm.org/D88770 --- llvm/include/llvm/Analysis/Utils/TFUtils.h | 58 ++++++ .../Analysis/DevelopmentModeInlineAdvisor.cpp | 180 +++++------------- llvm/lib/Analysis/TFUtils.cpp | 99 ++++++++++ .../Inline/ML/development-training-log.ll | 14 +- llvm/unittests/Analysis/TFUtilsTest.cpp | 86 +++++++++ 5 files changed, 296 insertions(+), 141 deletions(-) diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h index bba275b2524fd..522dcff76d502 100644 --- a/llvm/include/llvm/Analysis/Utils/TFUtils.h +++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h @@ -100,6 +100,64 @@ class TensorSpec final { Optional getTensorSpecFromJSON(LLVMContext &Ctx, const json::Value &Value); +/// Logging utility - given an ordered specification of features, and assuming +/// a scalar reward, allow logging feature values and rewards, and then print +/// as tf.train.SequenceExample text protobuf. +/// The assumption is that, for an event to be logged (i.e. a set of feature +/// values and a reward), the user calls the log* API for each feature exactly +/// once, providing the index matching the position in the feature spec list +/// provided at construction: +/// event 0: +/// logTensorValue(0, ...) +/// logTensorValue(1, ...) +/// ... +/// logReward(...) +/// event 1: +/// logTensorValue(0, ...) +/// logTensorValue(1, ...) +/// ... +/// logReward(...) +/// +/// At the end, call print to generate the protobuf. +class Logger final { +public: + struct LoggedFeatureSpec { + TensorSpec Spec; + Optional LoggingName; + }; + + /// Construct a Logger. If IncludeReward is false, then logReward shouldn't + /// be called, and the reward feature won't be printed out. + Logger(const std::vector &FeatureSpecs, + const TensorSpec &RewardSpec, bool IncludeReward) + : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec), + RawLogData(FeatureSpecs.size() + IncludeReward), + IncludeReward(IncludeReward) {} + + template void logReward(T Value) { + assert(IncludeReward); + logTensorValue(RawLogData.size() - 1, &Value); + } + + template + void logTensorValue(size_t FeatureID, const T *Value, size_t Size = 1) { + const char *Start = reinterpret_cast(Value); + const char *End = Start + sizeof(T) * Size; + RawLogData[FeatureID].insert(RawLogData[FeatureID].end(), Start, End); + } + + void print(raw_ostream &OS); + +private: + std::vector FeatureSpecs; + TensorSpec RewardSpec; + /// RawData has one entry per feature, plus one more for the reward. + /// Each feature's values are then stored in a vector, in succession. + /// This means the ith event is stored at [*][i] + std::vector> RawLogData; + const bool IncludeReward; +}; + class TFModelEvaluator final { public: /// The result of a model evaluation. Handles the lifetime of the output diff --git a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp index 6f8205e19230e..d247f574455b4 100644 --- a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp +++ b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp @@ -74,11 +74,11 @@ namespace { /// An InlineEvent, used by TrainingLogger. struct InlineEvent { /// What the default policy's decision would have been. - bool DefaultDecision = false; + int64_t DefaultDecision = 0; /// What we advised. When training off the default policy, this is the same as /// DefaultDecision. - bool AdvisedDecision = false; + int64_t AdvisedDecision = 0; /// What actually happened. This would be 'false' in the case of an inline /// error, even if AdvisedDecision were true, otherwise it agrees with @@ -109,91 +109,16 @@ class TrainingLogger final { void print(); private: - /// Write the values of one tensor as a list. - template - void writeTensorValues(raw_fd_ostream &OutFile, const char *TensorData, - size_t ElemCount) const { - OutFile << "["; - const T *TypedData = reinterpret_cast(TensorData); - for (size_t I = 0; I < ElemCount; ++I) { - if (I > 0) - OutFile << ", "; - OutFile << TypedData[I]; - } - OutFile << "]"; - } - - /// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs. - /// The tensors are assumed to be stored contiguously, in row-major format, - /// in the TensorData buffer. Each tensor has the shape given by Spec. The - /// feature name in the output is either the provided LoggingName, if - /// specified, otherwise it's the name of the tensor (as given by Spec). - template - void - writeTensorsAsFeatureLists(raw_fd_ostream &OutFile, const TensorSpec &Spec, - const T *TensorData, size_t TensorCount, - Optional LoggingName = None) const { - writeRawTensorsAsFeatureLists(OutFile, Spec, - reinterpret_cast(TensorData), - TensorCount, LoggingName); - } - - /// Untyped implementation of the API above. - void - writeRawTensorsAsFeatureLists(raw_fd_ostream &OutFile, const TensorSpec &Spec, - const char *TensorData, size_t TensorCount, - Optional LoggingName = None) const { - const char *FieldName = ""; - std::function ValueWriter; - // The 'Feature' protobuf only has 3 possible fields: float_list, - // int64_list, or bytes_list, so we capture int32 values as int64. We don't - // support any other types. - if (Spec.isElementType()) { - FieldName = "int64_list"; - ValueWriter = [&](const char *Data) { - writeTensorValues(OutFile, Data, Spec.getElementCount()); - }; - } else if (Spec.isElementType()) { - FieldName = "int64_list"; - ValueWriter = [&](const char *Data) { - writeTensorValues(OutFile, Data, Spec.getElementCount()); - }; - - } else if (Spec.isElementType()) { - FieldName = "float_list"; - ValueWriter = [&](const char *Data) { - writeTensorValues(OutFile, Data, Spec.getElementCount()); - }; - - } else - llvm_unreachable("Unsupported tensor type."); - - OutFile << " feature_list: {\n"; - OutFile << " key: " - << "\"" << (LoggingName ? *LoggingName : Spec.name()) << "\" "; - OutFile << "value: {\n"; - size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize(); - for (const char *P = TensorData, - *E = TensorData + TensorByteSize * TensorCount; - P < E; P += TensorByteSize) { - OutFile << " feature: { " << FieldName << ": { value: "; - ValueWriter(P); - OutFile << " } }\n"; - } - OutFile << " }\n"; - OutFile << " }\n"; - } - StringRef LogFileName; const ModelUnderTrainingRunner *const MUTR; - std::vector Features; - std::vector DefaultDecisions; - // We store all outputs as data blobs, but we always expect to have one, the - // first one, representing the decision. While we could track that separately, - // for uniformity, we store it, generically, here. - std::vector> Outputs; + std::unique_ptr L; std::vector Effects; - std::vector Rewards; + /// There's at least one output. We'll set this to a different value if MUTR + /// is avaliable. + size_t OutputCount = 1; + /// Set these 2 clearly OOB, to make sure we set them later. + size_t DefaultDecisionPos = std::numeric_limits::max(); + size_t DecisionPos = std::numeric_limits::max(); }; /// An extension of the MLInlineAdvisor for the 'development' mode, targeting @@ -331,8 +256,8 @@ class LoggingMLInlineAdvice : public MLInlineAdvice { TrainingLogger &Logger; const Optional CallerSizeEstimateBefore; const Optional CalleeSizeEstimateBefore; - const bool DefaultDecision; - const bool Mandatory; + const int64_t DefaultDecision; + const int64_t Mandatory; }; /// A pseudo model runner. We use it to store feature values when collecting @@ -402,69 +327,62 @@ class ModelUnderTrainingRunner final : public MLModelRunner { TrainingLogger::TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR) : LogFileName(LogFileName), MUTR(MUTR) { + // The first output is the inlining decision. + if (MUTR) + OutputCount = MUTR->outputSpecs().size(); + std::vector FT; + for (size_t I = 0; I < NumberOfFeatures; ++I) - Features.push_back(InlineFeatures()); + FT.push_back( + {TensorSpec::createSpec(FeatureNameMap.at(I), {1}), None}); + for (size_t I = 1; I < OutputCount; ++I) + FT.push_back({MUTR->outputSpecs()[I], MUTR->outputNames()[I]}); - // The first output is the inlining decision. - auto OutputCount = MUTR ? MUTR->outputSpecs().size() : 1; - Outputs.assign(OutputCount, std::vector()); + DefaultDecisionPos = FT.size(); + FT.push_back( + {TensorSpec::createSpec(DefaultDecisionName, {1}), None}); + + DecisionPos = FT.size(); + FT.push_back({TensorSpec::createSpec(DecisionName, {1}), None}); + + L = std::make_unique( + FT, TensorSpec::createSpec(RewardName, {1}), + InlineSizeEstimatorAnalysis::isEvaluatorRequested()); } /// Log one inlining event. void TrainingLogger::logInlineEvent(const InlineEvent &Event, const MLModelRunner &ModelRunner) { - for (size_t I = 0; I < NumberOfFeatures; ++I) - Features[I].push_back(ModelRunner.getFeature(I)); + size_t CurrentFeature = 0; + for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) { + int64_t F = ModelRunner.getFeature(CurrentFeature); + L->logTensorValue(CurrentFeature, &F); + } - Effects.push_back(Event.Effect); - Rewards.push_back(Event.Reward); - DefaultDecisions.push_back(Event.DefaultDecision); - int64_t Advice = static_cast(Event.AdvisedDecision); - const char *AdviceData = reinterpret_cast(&Advice); - Outputs[0].insert(Outputs[0].end(), AdviceData, AdviceData + sizeof(int64_t)); - for (size_t I = 1; I < Outputs.size(); ++I) { + for (size_t I = 1; I < OutputCount; ++I) { const auto &Result = *MUTR->lastEvaluationResult(); auto &Spec = MUTR->outputSpecs()[I]; const char *RawData = reinterpret_cast(Result.getUntypedTensorValue(I)); - Outputs[I].insert(Outputs[I].end(), RawData, - RawData + - Spec.getElementCount() * Spec.getElementByteSize()); + L->logTensorValue(CurrentFeature, RawData, + Spec.getElementCount() * Spec.getElementByteSize()); + ++CurrentFeature; } + + assert(CurrentFeature == DefaultDecisionPos); + L->logTensorValue(DefaultDecisionPos, &Event.DefaultDecision); + L->logTensorValue(DecisionPos, &Event.AdvisedDecision); + if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) + L->logReward(Event.Reward); + + // For debugging / later use + Effects.push_back(Event.Effect); } void TrainingLogger::print() { std::error_code EC; raw_fd_ostream OutFile(LogFileName, EC); - size_t NumberOfRecords = Rewards.size(); - if (NumberOfRecords == 0) - return; - - OutFile << "feature_lists: {\n"; - for (size_t I = 0; I < Features.size(); ++I) - writeTensorsAsFeatureLists( - OutFile, TensorSpec::createSpec(FeatureNameMap.at(I), {1}), - Features[I].data(), NumberOfRecords); - - writeTensorsAsFeatureLists( - OutFile, TensorSpec::createSpec(DefaultDecisionName, {1}), - DefaultDecisions.data(), NumberOfRecords); - - writeRawTensorsAsFeatureLists( - OutFile, TensorSpec::createSpec(DecisionName, {1}), - Outputs[0].data(), NumberOfRecords); - - if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) - writeTensorsAsFeatureLists(OutFile, - TensorSpec::createSpec(RewardName, {1}), - Rewards.data(), NumberOfRecords); - - for (size_t I = 1; I < Outputs.size(); ++I) - writeRawTensorsAsFeatureLists(OutFile, MUTR->outputSpecs()[I], - Outputs[I].data(), NumberOfRecords, - StringRef(MUTR->outputNames()[I])); - - OutFile << "}\n"; + L->print(OutFile); } DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp index 425f55c1ce549..1357cd9ef549d 100644 --- a/llvm/lib/Analysis/TFUtils.cpp +++ b/llvm/lib/Analysis/TFUtils.cpp @@ -62,6 +62,82 @@ TFStatusPtr createTFStatus() { TFSessionOptionsPtr createTFSessionOptions() { return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions); } + +/// Write the values of one tensor as a list. +template +void writeTensorValues(raw_ostream &OutFile, const char *TensorData, + size_t ElemCount) { + OutFile << "["; + const T *TypedData = reinterpret_cast(TensorData); + for (size_t I = 0; I < ElemCount; ++I) { + if (I > 0) + OutFile << ", "; + OutFile << TypedData[I]; + } + OutFile << "]"; +} + +/// Untyped implementation of the API above. +void writeRawTensorsAsFeatureLists(raw_ostream &OutFile, + const Logger::LoggedFeatureSpec &LoggedSpec, + const char *TensorData, size_t TensorCount) { + const char *FieldName = ""; + std::function ValueWriter; + const auto &Spec = LoggedSpec.Spec; + // The 'Feature' protobuf only has 3 possible fields: float_list, + // int64_list, or bytes_list, so we capture int32 values as int64. We don't + // support any other types. + if (Spec.isElementType()) { + FieldName = "int64_list"; + ValueWriter = [&](const char *Data) { + writeTensorValues(OutFile, Data, Spec.getElementCount()); + }; + } else if (Spec.isElementType()) { + FieldName = "int64_list"; + ValueWriter = [&](const char *Data) { + writeTensorValues(OutFile, Data, Spec.getElementCount()); + }; + + } else if (Spec.isElementType()) { + FieldName = "float_list"; + ValueWriter = [&](const char *Data) { + writeTensorValues(OutFile, Data, Spec.getElementCount()); + }; + + } else { + llvm_unreachable("Unsupported tensor type."); + } + + OutFile << " feature_list: {\n"; + OutFile << " key: " + << "\"" + << (LoggedSpec.LoggingName ? *LoggedSpec.LoggingName : Spec.name()) + << "\" "; + OutFile << "value: {\n"; + size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize(); + for (const char *P = TensorData, + *E = TensorData + TensorByteSize * TensorCount; + P < E; P += TensorByteSize) { + OutFile << " feature: { " << FieldName << ": { value: "; + ValueWriter(P); + OutFile << " } }\n"; + } + OutFile << " }\n"; + OutFile << " }\n"; +} + +/// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs. +/// The tensors are assumed to be stored contiguously, in row-major format, +/// in the TensorData buffer. Each tensor has the shape given by Spec. The +/// feature name in the output is either the provided LoggingName, if +/// specified, otherwise it's the name of the tensor (as given by Spec). +template +void writeTensorsAsFeatureLists(raw_ostream &OutFile, + const Logger::LoggedFeatureSpec &Spec, + const T *TensorData, size_t TensorCount) { + writeRawTensorsAsFeatureLists( + OutFile, Spec, reinterpret_cast(TensorData), TensorCount); +} } // namespace namespace llvm { @@ -318,4 +394,27 @@ TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL) TFModelEvaluator::EvaluationResult::~EvaluationResult() {} TFModelEvaluator::~TFModelEvaluator() {} + +void Logger::print(raw_ostream &OS) { + if (RawLogData.empty()) + return; + if (RawLogData[0].empty()) + return; + size_t Tensor0Size = FeatureSpecs[0].Spec.getElementCount() * + FeatureSpecs[0].Spec.getElementByteSize(); + size_t NumberOfRecords = RawLogData[0].size() / Tensor0Size; + if (NumberOfRecords == 0) + return; + + OS << "feature_lists: {\n"; + for (size_t I = 0; I < FeatureSpecs.size(); ++I) + writeTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(), + NumberOfRecords); + + if (IncludeReward) + writeTensorsAsFeatureLists(OS, {RewardSpec, None}, RawLogData.back().data(), + NumberOfRecords); + + OS << "}\n"; +} #endif // defined(LLVM_HAVE_TF_API) diff --git a/llvm/test/Transforms/Inline/ML/development-training-log.ll b/llvm/test/Transforms/Inline/ML/development-training-log.ll index 82dea452497dd..0dcff29a343d5 100644 --- a/llvm/test/Transforms/Inline/ML/development-training-log.ll +++ b/llvm/test/Transforms/Inline/ML/development-training-log.ll @@ -42,19 +42,13 @@ define dso_local i32 @top() { !1 = !{!"clang version 7.0.0-6 (tags/RELEASE_700/final)"} ; Check we produce a protobuf that has inlining decisions and rewards. -; CHECK: feature_lists: { +; CHECK-NOT: fake_extra_output +; EXTRA-OUTPUTS: key: "fake_extra_output" value: { +; EXTRA-OUTPUTS-NEXT: feature: { int64_list: { value: [1] } } ; CHECK: key: "inlining_decision" value: { ; CHECK-NEXT: feature: { int64_list: { value: [1] } } -; CHECK-NEXT: } -; CHECK-NEXT: } -; CHECK-NEXT: feature_list: { -; CHECK-NEXT: key: "delta_size" value: { +; CHECK: key: "delta_size" value: { ; CHECK-NEXT: feature: { int64_list: { value: [0] } } ; CHECK-NEXT: } ; CHECK-NEXT: } ; NOREWARD-NOT: key: "delta_size" value: { -; CHECK-NOT: fake_extra_output -; EXTRA-OUTPUTS: key: "fake_extra_output" value: { -; EXTRA-OUTPUTS-NEXT: feature: { int64_list: { value: [1] } } -; EXTRA-OUTPUTS-NEXT: } -; EXTRA-OUTPUTS-NEXT: } \ No newline at end of file diff --git a/llvm/unittests/Analysis/TFUtilsTest.cpp b/llvm/unittests/Analysis/TFUtilsTest.cpp index 19ca1f21c1613..3b62e33999d9c 100644 --- a/llvm/unittests/Analysis/TFUtilsTest.cpp +++ b/llvm/unittests/Analysis/TFUtilsTest.cpp @@ -142,3 +142,89 @@ TEST(TFUtilsTest, TensorSpecSizesAndTypes) { EXPECT_EQ(Spec3DLarge.getElementByteSize(), sizeof(float)); EXPECT_EQ(Spec1D.getElementByteSize(), sizeof(int16_t)); } + +TEST(TFUtilsTest, Logger) { + std::vector Features; + Features.push_back( + {TensorSpec::createSpec("the_float", {2, 3}), None}); + Features.push_back({TensorSpec::createSpec("the_int", {2}), + std::string("alternate_name")}); + + auto Rewards = TensorSpec::createSpec("reward", {1}); + Logger L(Features, Rewards, true); + float F00[]{0.0, 0.1, 0.2, 0.3, 0.4, 0.5}; + int64_t F01[]{2, 3}; + + L.logTensorValue(0, F00, 6); + L.logTensorValue(1, F01, 2); + L.logReward(3.4); + float F10[]{0.0, 1.0, 2.0, 3.0, 4.0, 5.0}; + int64_t F11[]{-2, -3}; + L.logTensorValue(0, F10, 6); + L.logTensorValue(1, F11, 2); + L.logReward(-3.0); + const auto *Expected = R"(feature_lists: { + feature_list: { + key: "the_float" value: { + feature: { float_list: { value: [0.000000e+00, 1.000000e-01, 2.000000e-01, 3.000000e-01, 4.000000e-01, 5.000000e-01] } } + feature: { float_list: { value: [0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00, 5.000000e+00] } } + } + } + feature_list: { + key: "alternate_name" value: { + feature: { int64_list: { value: [2, 3] } } + feature: { int64_list: { value: [-2, -3] } } + } + } + feature_list: { + key: "reward" value: { + feature: { float_list: { value: [3.400000e+00] } } + feature: { float_list: { value: [-3.000000e+00] } } + } + } +} +)"; + std::string Result; + raw_string_ostream OS(Result); + L.print(OS); + EXPECT_EQ(Result, Expected); +} + +TEST(TFUtilsTest, LoggerNoReward) { + std::vector Features; + Features.push_back( + {TensorSpec::createSpec("the_float", {2, 3}), None}); + Features.push_back({TensorSpec::createSpec("the_int", {2}), + std::string("alternate_name")}); + + auto Rewards = TensorSpec::createSpec("reward", {1}); + Logger L(Features, Rewards, false); + float F00[]{0.0, 0.1, 0.2, 0.3, 0.4, 0.5}; + int64_t F01[]{2, 3}; + + L.logTensorValue(0, F00, 6); + L.logTensorValue(1, F01, 2); + float F10[]{0.0, 1.0, 2.0, 3.0, 4.0, 5.0}; + int64_t F11[]{-2, -3}; + L.logTensorValue(0, F10, 6); + L.logTensorValue(1, F11, 2); + const auto *Expected = R"(feature_lists: { + feature_list: { + key: "the_float" value: { + feature: { float_list: { value: [0.000000e+00, 1.000000e-01, 2.000000e-01, 3.000000e-01, 4.000000e-01, 5.000000e-01] } } + feature: { float_list: { value: [0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00, 5.000000e+00] } } + } + } + feature_list: { + key: "alternate_name" value: { + feature: { int64_list: { value: [2, 3] } } + feature: { int64_list: { value: [-2, -3] } } + } + } +} +)"; + std::string Result; + raw_string_ostream OS(Result); + L.print(OS); + EXPECT_EQ(Result, Expected); +} \ No newline at end of file From d4e08c95e500bc813b24b146be29f17e7c08fa2a Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 17 Sep 2020 16:01:30 -0700 Subject: [PATCH 093/321] [NewPM] Set -enable-npm-optnone to true by default This makes the NPM skip not required passes on functions marked optnone. If this causes a pass that should be required but has not been marked required to be skipped, add `static bool isRequired() { return true; }` to the pass class. AlwaysInlinerPass is an example. clang/test/CodeGen/O0-no-skipped-passes.c is useful for checking that no passes are skipped under -O0. The -enable-npm-optnone option will be removed once this has been stable for long enough without issues. Reviewed By: ychen, asbirlea Differential Revision: https://reviews.llvm.org/D87869 --- llvm/lib/Passes/StandardInstrumentations.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index d2ef2cd4ed61e..06aa3868fd6d4 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -33,7 +33,7 @@ using namespace llvm; // TODO: remove once all required passes are marked as such. static cl::opt - EnableOptnone("enable-npm-optnone", cl::init(false), + EnableOptnone("enable-npm-optnone", cl::init(true), cl::desc("Enable skipping optional passes optnone functions " "under new pass manager")); From ebf6fd633ef83b747bac721aafc3677d2c47aef1 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Mon, 5 Oct 2020 18:39:33 -0700 Subject: [PATCH 094/321] Make OpenMP tests less brittle in the face of changes in constant evaluation diagnostics. --- .../OpenMP/distribute_collapse_messages.cpp | 38 ++++++--------- ...tribute_parallel_for_collapse_messages.cpp | 39 ++++++---------- ...te_parallel_for_simd_collapse_messages.cpp | 39 ++++++---------- ...ute_parallel_for_simd_safelen_messages.cpp | 33 +++++-------- ...ute_parallel_for_simd_simdlen_messages.cpp | 36 +++++---------- .../distribute_simd_collapse_messages.cpp | 39 ++++++---------- .../distribute_simd_safelen_messages.cpp | 33 +++++-------- .../distribute_simd_simdlen_messages.cpp | 33 +++++-------- clang/test/OpenMP/for_collapse_messages.cpp | 39 ++++++---------- clang/test/OpenMP/for_ordered_clause.cpp | 39 ++++++---------- .../OpenMP/for_simd_collapse_messages.cpp | 39 ++++++---------- .../test/OpenMP/for_simd_safelen_messages.cpp | 39 ++++++---------- .../test/OpenMP/for_simd_simdlen_messages.cpp | 39 ++++++---------- .../master_taskloop_collapse_messages.cpp | 39 ++++++---------- ...master_taskloop_simd_collapse_messages.cpp | 39 ++++++---------- .../master_taskloop_simd_safelen_messages.cpp | 39 ++++++---------- .../master_taskloop_simd_simdlen_messages.cpp | 39 ++++++---------- .../OpenMP/parallel_for_collapse_messages.cpp | 39 ++++++---------- .../OpenMP/parallel_for_ordered_messages.cpp | 39 ++++++---------- .../parallel_for_simd_collapse_messages.cpp | 39 ++++++---------- .../parallel_for_simd_safelen_messages.cpp | 39 ++++++---------- .../parallel_for_simd_simdlen_messages.cpp | 39 ++++++---------- ...llel_master_taskloop_collapse_messages.cpp | 39 ++++++---------- ...master_taskloop_simd_collapse_messages.cpp | 39 ++++++---------- ..._master_taskloop_simd_safelen_messages.cpp | 39 ++++++---------- ..._master_taskloop_simd_simdlen_messages.cpp | 39 ++++++---------- clang/test/OpenMP/simd_collapse_messages.cpp | 39 ++++++---------- clang/test/OpenMP/simd_safelen_messages.cpp | 39 ++++++---------- clang/test/OpenMP/simd_simdlen_messages.cpp | 39 ++++++---------- .../target_parallel_for_collapse_messages.cpp | 39 +++++++--------- .../target_parallel_for_ordered_messages.cpp | 39 +++++++--------- ...et_parallel_for_simd_collapse_messages.cpp | 44 ++++++++---------- ...get_parallel_for_simd_ordered_messages.cpp | 33 ++++++------- ...get_parallel_for_simd_safelen_messages.cpp | 39 ++++++---------- ...get_parallel_for_simd_simdlen_messages.cpp | 39 ++++++---------- .../OpenMP/target_simd_collapse_messages.cpp | 46 ++++++++----------- .../OpenMP/target_simd_safelen_messages.cpp | 39 ++++++---------- .../OpenMP/target_simd_simdlen_messages.cpp | 39 ++++++---------- ...get_teams_distribute_collapse_messages.cpp | 35 +++++--------- ...tribute_parallel_for_collapse_messages.cpp | 35 +++++--------- ...te_parallel_for_simd_collapse_messages.cpp | 35 +++++--------- ...ute_parallel_for_simd_safelen_messages.cpp | 33 +++++-------- ...ute_parallel_for_simd_simdlen_messages.cpp | 32 +++++-------- ...eams_distribute_simd_collapse_messages.cpp | 43 +++++++---------- ...teams_distribute_simd_safelen_messages.cpp | 33 +++++-------- ...teams_distribute_simd_simdlen_messages.cpp | 33 +++++-------- .../OpenMP/taskloop_collapse_messages.cpp | 39 ++++++---------- .../taskloop_simd_collapse_messages.cpp | 39 ++++++---------- .../OpenMP/taskloop_simd_safelen_messages.cpp | 39 ++++++---------- .../OpenMP/taskloop_simd_simdlen_messages.cpp | 39 ++++++---------- .../teams_distribute_collapse_messages.cpp | 35 +++++--------- ...tribute_parallel_for_collapse_messages.cpp | 35 +++++--------- ...te_parallel_for_simd_collapse_messages.cpp | 35 +++++--------- ...ute_parallel_for_simd_safelen_messages.cpp | 33 +++++-------- ...ute_parallel_for_simd_simdlen_messages.cpp | 33 +++++-------- ...eams_distribute_simd_collapse_messages.cpp | 43 +++++++---------- ...teams_distribute_simd_safelen_messages.cpp | 33 +++++-------- ...teams_distribute_simd_simdlen_messages.cpp | 33 +++++-------- 58 files changed, 794 insertions(+), 1387 deletions(-) diff --git a/clang/test/OpenMP/distribute_collapse_messages.cpp b/clang/test/OpenMP/distribute_collapse_messages.cpp index 6a0c8c31cd97f..c14b7da715497 100644 --- a/clang/test/OpenMP/distribute_collapse_messages.cpp +++ b/clang/test/OpenMP/distribute_collapse_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L - // expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp distribute collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp distribute collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp distribute collapse () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp distribute collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}} @@ -38,18 +36,16 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp distribute collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp distribute', but found only 1}} -#if __cplusplus >= 201103L - // expected-note@+5 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp distribute' cannot contain more than one 'collapse' clause}} // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+1 2 {{integral constant expression}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp distribute collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp distribute collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -75,15 +71,11 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp distribute', but found only 1}} #pragma omp distribute collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp distribute' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp distribute', but found only 1}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif - #pragma omp distribute collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} + + #pragma omp distribute collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+5 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif - // expected-error@+3 {{integral constant expression}} + + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp distribute' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp distribute collapse (foobool(argc)), collapse (true), collapse (-5) @@ -91,7 +83,7 @@ int main(int argc, char **argv) { #pragma omp distribute collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/distribute_parallel_for_collapse_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_collapse_messages.cpp index 3883575e68bc3..2d9e4fcf532b5 100644 --- a/clang/test/OpenMP/distribute_parallel_for_collapse_messages.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_collapse_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target #pragma omp teams #pragma omp distribute parallel for collapse // expected-error {{expected '(' after 'collapse'}} @@ -34,9 +33,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target #pragma omp teams - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp distribute parallel for collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target @@ -52,12 +50,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp teams #pragma omp distribute parallel for collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp distribute parallel for', but found only 1}} - // expected-error@+8 2 {{directive '#pragma omp distribute parallel for' cannot contain more than one 'collapse' clause}} - // expected-error@+7 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+6 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+5 2 {{directive '#pragma omp distribute parallel for' cannot contain more than one 'collapse' clause}} + // expected-error@+4 {{argument to 'collapse' clause must be a strictly positive integer value}} + // expected-error@+3 2 {{integral constant expression}} expected-note@+3 0+{{constant expression}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for collapse (foobool(argc)), collapse (true), collapse (-5) @@ -67,7 +62,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp distribute parallel for collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+6 2 {{integral constant expression}} + // expected-error@+6 2 {{integral constant expression}} expected-note@+6 0+{{constant expression}} #else // expected-error@+4 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -111,18 +106,12 @@ int main(int argc, char **argv) { #pragma omp teams #pragma omp distribute parallel for collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp distribute parallel for', but found only 1}} - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for collapse (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+8 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+6{{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+5 {{integral constant expression}} expected-note@+5 0+{{constant expression}} // expected-error@+4 2 {{directive '#pragma omp distribute parallel for' cannot contain more than one 'collapse' clause}} // expected-error@+3 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp target @@ -134,7 +123,7 @@ int main(int argc, char **argv) { #pragma omp distribute parallel for collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+6 {{integral constant expression}} + // expected-error@+6 {{integral constant expression}} expected-note@+6 0+{{constant expression}} #else // expected-error@+4 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_collapse_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_collapse_messages.cpp index ff9a728ee2dec..d58c6f2831a9d 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_collapse_messages.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_collapse_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd collapse // expected-error {{expected '(' after 'collapse'}} @@ -34,9 +33,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target #pragma omp teams - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp distribute parallel for simd collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target @@ -52,12 +50,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp teams #pragma omp distribute parallel for simd collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp distribute parallel for simd', but found only 1}} - // expected-error@+8 2 {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'collapse' clause}} - // expected-error@+7 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+6 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+5 2 {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'collapse' clause}} + // expected-error@+4 {{argument to 'collapse' clause must be a strictly positive integer value}} + // expected-error@+3 2 {{integral constant expression}} expected-note@+3 0+{{constant expression}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd collapse (foobool(argc)), collapse (true), collapse (-5) @@ -67,7 +62,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp distribute parallel for simd collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+6 2 {{integral constant expression}} + // expected-error@+6 2 {{integral constant expression}} expected-note@+6 0+{{constant expression}} #else // expected-error@+4 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -111,18 +106,12 @@ int main(int argc, char **argv) { #pragma omp teams #pragma omp distribute parallel for simd collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp distribute parallel for simd', but found only 1}} - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd collapse (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+8 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+6{{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+5 {{integral constant expression}} expected-note@+5 0+{{constant expression}} // expected-error@+4 2 {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'collapse' clause}} // expected-error@+3 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp target @@ -134,7 +123,7 @@ int main(int argc, char **argv) { #pragma omp distribute parallel for simd collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+6 {{integral constant expression}} + // expected-error@+6 {{integral constant expression}} expected-note@+6 0+{{constant expression}} #else // expected-error@+4 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_safelen_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_safelen_messages.cpp index 2d659efadbbdc..eba65f733fcf9 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_safelen_messages.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_safelen_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target #pragma omp teams @@ -40,7 +39,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-error {{expected ')'}} +#pragma omp distribute parallel for simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-note 0+{{constant expression}} expected-error {{expected ')'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -62,14 +61,11 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#if __cplusplus >= 201103L - // expected-note@+7 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target #pragma omp teams // expected-error@+3 2 {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'safelen' clause}} // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{integral constant expression}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp distribute parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -81,7 +77,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+6 2 {{integral constant expression}} + // expected-error@+6 2 {{integral constant expression}} expected-note@+6 0+{{constant expression}} #else // expected-error@+4 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -137,23 +133,18 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + #pragma omp target #pragma omp teams -#pragma omp parallel for simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} +#pragma omp parallel for simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+7 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target #pragma omp teams // expected-error@+3 {{argument to 'safelen' clause must be a strictly positive integer value}} // expected-error@+2 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'safelen' clause}} -// expected-error@+1 {{integral constant expression}} +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; @@ -165,7 +156,7 @@ int main(int argc, char **argv) { argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+6 {{integral constant expression}} + // expected-error@+6 {{integral constant expression}} expected-note@+6 0+{{constant expression}} #else // expected-error@+4 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_simdlen_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_simdlen_messages.cpp index b4f40e8e5c231..e035f16b60b45 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_simdlen_messages.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_simdlen_messages.cpp @@ -5,20 +5,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd simdlen // expected-error {{expected '(' after 'simdlen'}} @@ -37,9 +36,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -// expected-error@+5 {{expected ')'}} expected-note@+5 {{to match this '('}} -// expected-error@+4 2 {{integral constant expression}} -// expected-note@+3 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} +// expected-error@+4 {{expected ')'}} expected-note@+4 {{to match this '('}} +// expected-error@+3 2 {{integral constant expression}} expected-note@+3 0+{{constant expression}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd simdlen (argc @@ -65,14 +63,11 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#if __cplusplus >= 201103L - // expected-note@+7 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target #pragma omp teams // expected-error@+3 2 {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'simdlen' clause}} // expected-error@+2 {{argument to 'simdlen' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{integral constant expression}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp distribute parallel for simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -84,7 +79,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+6 2 {{integral constant expression}} + // expected-error@+6 2 {{integral constant expression}} expected-note@+6 0+{{constant expression}} #else // expected-error@+4 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -140,22 +135,15 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd simdlen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} +#pragma omp distribute parallel for simd simdlen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - -#if __cplusplus >= 201103L - // expected-note@+7 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target #pragma omp teams -// expected-error@+3 {{integral constant expression}} +// expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'simdlen' clause}} // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}} #pragma omp distribute parallel for simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) @@ -169,7 +157,7 @@ int main(int argc, char **argv) { argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+6 {{integral constant expression}} + // expected-error@+6 {{integral constant expression}} expected-note@+6 0+{{constant expression}} #else // expected-error@+4 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/distribute_simd_collapse_messages.cpp b/clang/test/OpenMP/distribute_simd_collapse_messages.cpp index 418094bc1a7b0..98b02ed814fd4 100644 --- a/clang/test/OpenMP/distribute_simd_collapse_messages.cpp +++ b/clang/test/OpenMP/distribute_simd_collapse_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target #pragma omp teams #pragma omp distribute simd collapse // expected-error {{expected '(' after 'collapse'}} @@ -34,9 +33,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target #pragma omp teams - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp distribute simd collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target @@ -52,12 +50,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp teams #pragma omp distribute simd collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp distribute simd', but found only 1}} - // expected-error@+8 2 {{directive '#pragma omp distribute simd' cannot contain more than one 'collapse' clause}} - // expected-error@+7 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+6 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+5 2 {{directive '#pragma omp distribute simd' cannot contain more than one 'collapse' clause}} + // expected-error@+4 {{argument to 'collapse' clause must be a strictly positive integer value}} + // expected-error@+3 2 {{integral constant expression}} expected-note@+3 0+{{constant expression}} #pragma omp target #pragma omp teams #pragma omp distribute simd collapse (foobool(argc)), collapse (true), collapse (-5) @@ -67,7 +62,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp distribute simd collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+6 2 {{integral constant expression}} + // expected-error@+6 2 {{integral constant expression}} expected-note@+6 0+{{constant expression}} #else // expected-error@+4 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -111,18 +106,12 @@ int main(int argc, char **argv) { #pragma omp teams #pragma omp distribute simd collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp distribute simd' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp distribute simd', but found only 1}} - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} #pragma omp target #pragma omp teams #pragma omp distribute simd collapse (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+8 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+6{{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+5 {{integral constant expression}} expected-note@+5 0+{{constant expression}} // expected-error@+4 2 {{directive '#pragma omp distribute simd' cannot contain more than one 'collapse' clause}} // expected-error@+3 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp target @@ -134,7 +123,7 @@ int main(int argc, char **argv) { #pragma omp distribute simd collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+6 {{integral constant expression}} + // expected-error@+6 {{integral constant expression}} expected-note@+6 0+{{constant expression}} #else // expected-error@+4 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/distribute_simd_safelen_messages.cpp b/clang/test/OpenMP/distribute_simd_safelen_messages.cpp index bffc8ec85469c..51d6778294862 100644 --- a/clang/test/OpenMP/distribute_simd_safelen_messages.cpp +++ b/clang/test/OpenMP/distribute_simd_safelen_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target #pragma omp teams @@ -40,7 +39,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp target #pragma omp teams -#pragma omp distribute simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-error {{expected ')'}} +#pragma omp distribute simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-note 0+{{constant expression}} expected-error {{expected ')'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -62,14 +61,11 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#if __cplusplus >= 201103L - // expected-note@+7 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target #pragma omp teams // expected-error@+3 2 {{directive '#pragma omp distribute simd' cannot contain more than one 'safelen' clause}} // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{integral constant expression}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp distribute simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -81,7 +77,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+6 2 {{integral constant expression}} + // expected-error@+6 2 {{integral constant expression}} expected-note@+6 0+{{constant expression}} #else // expected-error@+4 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -137,23 +133,18 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + #pragma omp target #pragma omp teams -#pragma omp parallel for simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} +#pragma omp parallel for simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+7 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target #pragma omp teams // expected-error@+3 {{argument to 'safelen' clause must be a strictly positive integer value}} // expected-error@+2 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'safelen' clause}} -// expected-error@+1 {{integral constant expression}} +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; @@ -165,7 +156,7 @@ int main(int argc, char **argv) { argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+6 {{integral constant expression}} + // expected-error@+6 {{integral constant expression}} expected-note@+6 0+{{constant expression}} #else // expected-error@+4 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/distribute_simd_simdlen_messages.cpp b/clang/test/OpenMP/distribute_simd_simdlen_messages.cpp index bffc8ec85469c..51d6778294862 100644 --- a/clang/test/OpenMP/distribute_simd_simdlen_messages.cpp +++ b/clang/test/OpenMP/distribute_simd_simdlen_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target #pragma omp teams @@ -40,7 +39,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp target #pragma omp teams -#pragma omp distribute simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-error {{expected ')'}} +#pragma omp distribute simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-note 0+{{constant expression}} expected-error {{expected ')'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -62,14 +61,11 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#if __cplusplus >= 201103L - // expected-note@+7 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target #pragma omp teams // expected-error@+3 2 {{directive '#pragma omp distribute simd' cannot contain more than one 'safelen' clause}} // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{integral constant expression}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp distribute simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -81,7 +77,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+6 2 {{integral constant expression}} + // expected-error@+6 2 {{integral constant expression}} expected-note@+6 0+{{constant expression}} #else // expected-error@+4 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -137,23 +133,18 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + #pragma omp target #pragma omp teams -#pragma omp parallel for simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} +#pragma omp parallel for simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+7 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target #pragma omp teams // expected-error@+3 {{argument to 'safelen' clause must be a strictly positive integer value}} // expected-error@+2 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'safelen' clause}} -// expected-error@+1 {{integral constant expression}} +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; @@ -165,7 +156,7 @@ int main(int argc, char **argv) { argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+6 {{integral constant expression}} + // expected-error@+6 {{integral constant expression}} expected-note@+6 0+{{constant expression}} #else // expected-error@+4 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/for_collapse_messages.cpp b/clang/test/OpenMP/for_collapse_messages.cpp index 07974be8da967..07630ffaaed95 100644 --- a/clang/test/OpenMP/for_collapse_messages.cpp +++ b/clang/test/OpenMP/for_collapse_messages.cpp @@ -12,29 +12,27 @@ // RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp for collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp for collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp for collapse () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp for collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}} @@ -44,18 +42,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp for collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp for', but found only 1}} - // expected-error@+6 2 {{directive '#pragma omp for' cannot contain more than one 'collapse' clause}} - // expected-error@+5 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp for' cannot contain more than one 'collapse' clause}} + // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp for collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp for collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -81,16 +76,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp for', but found only 1}} #pragma omp for collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp for' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp for', but found only 1}} - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp for collapse (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp for' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp for collapse (foobool(argc)), collapse (true), collapse (-5) @@ -98,7 +87,7 @@ int main(int argc, char **argv) { #pragma omp for collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/for_ordered_clause.cpp b/clang/test/OpenMP/for_ordered_clause.cpp index 537e52b605110..d9dbb828f6452 100644 --- a/clang/test/OpenMP/for_ordered_clause.cpp +++ b/clang/test/OpenMP/for_ordered_clause.cpp @@ -5,22 +5,21 @@ // RUN: %clang_cc1 -verify -fopenmp-simd %s // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s + +// expected-note@* 0+{{declared here}} // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp for ordered for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; @@ -30,9 +29,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here} #pragma omp for ordered() // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; -// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} -// expected-error@+2 2 {{integral constant expression}} -// expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} +// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp for ordered(argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; @@ -46,12 +44,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here} #pragma omp for ordered((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'ordered' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp for', but found only 1}} -// expected-error@+6 2 {{directive '#pragma omp for' cannot contain more than one 'ordered' clause}} -// expected-error@+5 {{argument to 'ordered' clause must be a strictly positive integer value}} -// expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L -// expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif +// expected-error@+3 2 {{directive '#pragma omp for' cannot contain more than one 'ordered' clause}} +// expected-error@+2 {{argument to 'ordered' clause must be a strictly positive integer value}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp for ordered(foobool(argc)), ordered(true), ordered(-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; @@ -59,7 +54,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; #if __cplusplus <= 199711L -// expected-error@+4 2 {{integral constant expression}} +// expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -101,17 +96,11 @@ int main(int argc, char **argv) { #pragma omp for ordered(2 + 2)) // expected-warning {{extra tokens at the end of '#pragma omp for' are ignored}} expected-note {{as specified in 'ordered' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i - 4]; // expected-error {{expected 4 for loops after '#pragma omp for', but found only 1}} -// expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L -// expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp for ordered(foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i - 4]; -// expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L -// expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif +// expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp for' cannot contain more than one 'ordered' clause}} // expected-error@+1 {{argument to 'ordered' clause must be a strictly positive integer value}} #pragma omp for ordered(foobool(argc)), ordered(true), ordered(-5) @@ -121,7 +110,7 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i - 4]; #if __cplusplus <= 199711L -// expected-error@+4 {{integral constant expression}} +// expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/for_simd_collapse_messages.cpp b/clang/test/OpenMP/for_simd_collapse_messages.cpp index efd9d39e4719d..d9f8a2d971518 100644 --- a/clang/test/OpenMP/for_simd_collapse_messages.cpp +++ b/clang/test/OpenMP/for_simd_collapse_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp for simd collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp for simd collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp for simd collapse () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp for simd collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp for simd collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp for simd', but found only 1}} - // expected-error@+6 2 {{directive '#pragma omp for simd' cannot contain more than one 'collapse' clause}} - // expected-error@+5 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp for simd' cannot contain more than one 'collapse' clause}} + // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp for simd collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp for simd collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -75,16 +70,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp for simd', but found only 1}} #pragma omp for simd collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp for simd' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp for simd', but found only 1}} - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp for simd collapse (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp for simd' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp for simd collapse (foobool(argc)), collapse (true), collapse (-5) @@ -92,7 +81,7 @@ int main(int argc, char **argv) { #pragma omp for simd collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/for_simd_safelen_messages.cpp b/clang/test/OpenMP/for_simd_safelen_messages.cpp index f7142972d0a92..aaade5d181ad9 100644 --- a/clang/test/OpenMP/for_simd_safelen_messages.cpp +++ b/clang/test/OpenMP/for_simd_safelen_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp for simd safelen // expected-error {{expected '(' after 'safelen'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp for simd safelen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp for simd safelen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp for simd safelen (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp for simd safelen ((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+6 2 {{directive '#pragma omp for simd' cannot contain more than one 'safelen' clause}} - // expected-error@+5 {{argument to 'safelen' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp for simd' cannot contain more than one 'safelen' clause}} + // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp for simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp for simd safelen (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -73,16 +68,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp for simd safelen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp for simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp for simd safelen (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp for simd' cannot contain more than one 'safelen' clause}} // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}} #pragma omp for simd safelen (foobool(argc)), safelen (true), safelen (-5) @@ -90,7 +79,7 @@ int main(int argc, char **argv) { #pragma omp for simd safelen (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/for_simd_simdlen_messages.cpp b/clang/test/OpenMP/for_simd_simdlen_messages.cpp index eb0455e618f1a..fa270abb9d566 100644 --- a/clang/test/OpenMP/for_simd_simdlen_messages.cpp +++ b/clang/test/OpenMP/for_simd_simdlen_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp for simd simdlen // expected-error {{expected '(' after 'simdlen'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp for simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp for simd simdlen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp for simd simdlen (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp for simd simdlen ((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+6 2 {{directive '#pragma omp for simd' cannot contain more than one 'simdlen' clause}} - // expected-error@+5 {{argument to 'simdlen' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp for simd' cannot contain more than one 'simdlen' clause}} + // expected-error@+2 {{argument to 'simdlen' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp for simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp for simd simdlen (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -73,16 +68,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp for simd simdlen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp for simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp for simd simdlen (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp for simd' cannot contain more than one 'simdlen' clause}} // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}} #pragma omp for simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) @@ -90,7 +79,7 @@ int main(int argc, char **argv) { #pragma omp for simd simdlen (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/master_taskloop_collapse_messages.cpp b/clang/test/OpenMP/master_taskloop_collapse_messages.cpp index 99cc4263ebb7a..b6a20ee7382ad 100644 --- a/clang/test/OpenMP/master_taskloop_collapse_messages.cpp +++ b/clang/test/OpenMP/master_taskloop_collapse_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp master taskloop collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp master taskloop collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp master taskloop collapse () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp master taskloop collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp master taskloop collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp master taskloop', but found only 1}} - // expected-error@+6 2 {{directive '#pragma omp master taskloop' cannot contain more than one 'collapse' clause}} - // expected-error@+5 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp master taskloop' cannot contain more than one 'collapse' clause}} + // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp master taskloop collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp master taskloop collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -75,16 +70,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp master taskloop', but found only 1}} #pragma omp master taskloop collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp master taskloop', but found only 1}} - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp master taskloop collapse (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp master taskloop' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp master taskloop collapse (foobool(argc)), collapse (true), collapse (-5) @@ -92,7 +81,7 @@ int main(int argc, char **argv) { #pragma omp master taskloop collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/master_taskloop_simd_collapse_messages.cpp b/clang/test/OpenMP/master_taskloop_simd_collapse_messages.cpp index 787c50aade7bd..74d68bb341178 100644 --- a/clang/test/OpenMP/master_taskloop_simd_collapse_messages.cpp +++ b/clang/test/OpenMP/master_taskloop_simd_collapse_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp master taskloop simd collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp master taskloop simd collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp master taskloop simd collapse () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp master taskloop simd collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp master taskloop simd collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp master taskloop simd', but found only 1}} - // expected-error@+6 2 {{directive '#pragma omp master taskloop simd' cannot contain more than one 'collapse' clause}} - // expected-error@+5 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp master taskloop simd' cannot contain more than one 'collapse' clause}} + // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp master taskloop simd collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp master taskloop simd collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -75,16 +70,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp master taskloop simd', but found only 1}} #pragma omp master taskloop simd collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp master taskloop simd' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp master taskloop simd', but found only 1}} - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp master taskloop simd collapse (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp master taskloop simd' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp master taskloop simd collapse (foobool(argc)), collapse (true), collapse (-5) @@ -92,7 +81,7 @@ int main(int argc, char **argv) { #pragma omp master taskloop simd collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/master_taskloop_simd_safelen_messages.cpp b/clang/test/OpenMP/master_taskloop_simd_safelen_messages.cpp index 9420cde083650..b92f463b4401d 100644 --- a/clang/test/OpenMP/master_taskloop_simd_safelen_messages.cpp +++ b/clang/test/OpenMP/master_taskloop_simd_safelen_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp master taskloop simd safelen // expected-error {{expected '(' after 'safelen'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp master taskloop simd safelen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp master taskloop simd safelen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp master taskloop simd safelen (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp master taskloop simd safelen ((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+6 2 {{directive '#pragma omp master taskloop simd' cannot contain more than one 'safelen' clause}} - // expected-error@+5 {{argument to 'safelen' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp master taskloop simd' cannot contain more than one 'safelen' clause}} + // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp master taskloop simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp master taskloop simd safelen (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -73,16 +68,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp master taskloop simd safelen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp master taskloop simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp master taskloop simd safelen (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp master taskloop simd' cannot contain more than one 'safelen' clause}} // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}} #pragma omp master taskloop simd safelen (foobool(argc)), safelen (true), safelen (-5) @@ -90,7 +79,7 @@ int main(int argc, char **argv) { #pragma omp master taskloop simd safelen (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/master_taskloop_simd_simdlen_messages.cpp b/clang/test/OpenMP/master_taskloop_simd_simdlen_messages.cpp index 36abee7b5cfec..5fc0b163ec850 100644 --- a/clang/test/OpenMP/master_taskloop_simd_simdlen_messages.cpp +++ b/clang/test/OpenMP/master_taskloop_simd_simdlen_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp master taskloop simd simdlen // expected-error {{expected '(' after 'simdlen'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp master taskloop simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp master taskloop simd simdlen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp master taskloop simd simdlen (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp master taskloop simd simdlen ((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+6 2 {{directive '#pragma omp master taskloop simd' cannot contain more than one 'simdlen' clause}} - // expected-error@+5 {{argument to 'simdlen' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp master taskloop simd' cannot contain more than one 'simdlen' clause}} + // expected-error@+2 {{argument to 'simdlen' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp master taskloop simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp master taskloop simd simdlen (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -73,16 +68,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp master taskloop simd simdlen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp master taskloop simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp master taskloop simd simdlen (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp master taskloop simd' cannot contain more than one 'simdlen' clause}} // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}} #pragma omp master taskloop simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) @@ -90,7 +79,7 @@ int main(int argc, char **argv) { #pragma omp master taskloop simd simdlen (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/parallel_for_collapse_messages.cpp b/clang/test/OpenMP/parallel_for_collapse_messages.cpp index ba48ce50178a0..5ad5628e97199 100644 --- a/clang/test/OpenMP/parallel_for_collapse_messages.cpp +++ b/clang/test/OpenMP/parallel_for_collapse_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp parallel for collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel for collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel for collapse () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel for collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp parallel for', but found only 1}} - // expected-error@+6 2 {{directive '#pragma omp parallel for' cannot contain more than one 'collapse' clause}} - // expected-error@+5 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp parallel for' cannot contain more than one 'collapse' clause}} + // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel for collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -75,16 +70,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp parallel for', but found only 1}} #pragma omp parallel for collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp parallel for' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp parallel for', but found only 1}} - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for collapse (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4{{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp parallel for' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp parallel for collapse (foobool(argc)), collapse (true), collapse (-5) @@ -92,7 +81,7 @@ int main(int argc, char **argv) { #pragma omp parallel for collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/parallel_for_ordered_messages.cpp b/clang/test/OpenMP/parallel_for_ordered_messages.cpp index 7b46191fba928..3d6a8f267acb6 100644 --- a/clang/test/OpenMP/parallel_for_ordered_messages.cpp +++ b/clang/test/OpenMP/parallel_for_ordered_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp parallel for ordered for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; @@ -29,9 +28,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here} #pragma omp parallel for ordered() // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; -// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} -// expected-error@+2 2 {{integral constant expression}} -// expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} +// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for ordered(argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; @@ -45,12 +43,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here} #pragma omp parallel for ordered((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'ordered' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp parallel for', but found only 1}} -// expected-error@+6 2 {{directive '#pragma omp parallel for' cannot contain more than one 'ordered' clause}} -// expected-error@+5 {{argument to 'ordered' clause must be a strictly positive integer value}} -// expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L -// expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif +// expected-error@+3 2 {{directive '#pragma omp parallel for' cannot contain more than one 'ordered' clause}} +// expected-error@+2 {{argument to 'ordered' clause must be a strictly positive integer value}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for ordered(foobool(argc)), ordered(true), ordered(-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; @@ -58,7 +53,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; #if __cplusplus <= 199711L -// expected-error@+4 2 {{integral constant expression}} +// expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -92,17 +87,11 @@ int main(int argc, char **argv) { #pragma omp parallel for ordered(2 + 2)) // expected-warning {{extra tokens at the end of '#pragma omp parallel for' are ignored}} expected-note {{as specified in 'ordered' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i - 4]; // expected-error {{expected 4 for loops after '#pragma omp parallel for', but found only 1}} -// expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L -// expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for ordered(foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i - 4]; -// expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L -// expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif +// expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp parallel for' cannot contain more than one 'ordered' clause}} // expected-error@+1 {{argument to 'ordered' clause must be a strictly positive integer value}} #pragma omp parallel for ordered(foobool(argc)), ordered(true), ordered(-5) @@ -112,7 +101,7 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i - 4]; #if __cplusplus <= 199711L -// expected-error@+4 {{integral constant expression}} +// expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/parallel_for_simd_collapse_messages.cpp b/clang/test/OpenMP/parallel_for_simd_collapse_messages.cpp index f04b1881db3bc..7c12ce827a4ad 100644 --- a/clang/test/OpenMP/parallel_for_simd_collapse_messages.cpp +++ b/clang/test/OpenMP/parallel_for_simd_collapse_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp parallel for simd collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel for simd collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel for simd collapse () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for simd collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel for simd collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp parallel for simd', but found only 1}} - // expected-error@+6 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'collapse' clause}} - // expected-error@+5 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'collapse' clause}} + // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for simd collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel for simd collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -75,16 +70,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp parallel for simd', but found only 1}} #pragma omp parallel for simd collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp parallel for simd' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp parallel for simd', but found only 1}} - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for simd collapse (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp parallel for simd collapse (foobool(argc)), collapse (true), collapse (-5) @@ -92,7 +81,7 @@ int main(int argc, char **argv) { #pragma omp parallel for simd collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/parallel_for_simd_safelen_messages.cpp b/clang/test/OpenMP/parallel_for_simd_safelen_messages.cpp index 29da9ff27a0be..e4facb1c93f0a 100644 --- a/clang/test/OpenMP/parallel_for_simd_safelen_messages.cpp +++ b/clang/test/OpenMP/parallel_for_simd_safelen_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp parallel for simd safelen // expected-error {{expected '(' after 'safelen'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel for simd safelen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel for simd safelen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for simd safelen (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel for simd safelen ((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+6 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'safelen' clause}} - // expected-error@+5 {{argument to 'safelen' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'safelen' clause}} + // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel for simd safelen (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -73,16 +68,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp parallel for simd safelen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp parallel for simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for simd safelen (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'safelen' clause}} // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}} #pragma omp parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) @@ -90,7 +79,7 @@ int main(int argc, char **argv) { #pragma omp parallel for simd safelen (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/parallel_for_simd_simdlen_messages.cpp b/clang/test/OpenMP/parallel_for_simd_simdlen_messages.cpp index 8e0bf9ce6d7b8..3b225ab3e54f0 100644 --- a/clang/test/OpenMP/parallel_for_simd_simdlen_messages.cpp +++ b/clang/test/OpenMP/parallel_for_simd_simdlen_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp parallel for simd simdlen // expected-error {{expected '(' after 'simdlen'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel for simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel for simd simdlen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for simd simdlen (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel for simd simdlen ((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+6 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'simdlen' clause}} - // expected-error@+5 {{argument to 'simdlen' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'simdlen' clause}} + // expected-error@+2 {{argument to 'simdlen' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel for simd simdlen (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -73,16 +68,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp parallel for simd simdlen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp parallel for simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel for simd simdlen (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'simdlen' clause}} // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}} #pragma omp parallel for simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) @@ -90,7 +79,7 @@ int main(int argc, char **argv) { #pragma omp parallel for simd simdlen (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/parallel_master_taskloop_collapse_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_collapse_messages.cpp index d93222a4b5c76..7306271791918 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_collapse_messages.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_collapse_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp parallel master taskloop collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel master taskloop collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel master taskloop collapse () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel master taskloop collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel master taskloop collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp parallel master taskloop', but found only 1}} - // expected-error@+6 2 {{directive '#pragma omp parallel master taskloop' cannot contain more than one 'collapse' clause}} - // expected-error@+5 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp parallel master taskloop' cannot contain more than one 'collapse' clause}} + // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel master taskloop collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel master taskloop collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -75,16 +70,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp parallel master taskloop', but found only 1}} #pragma omp parallel master taskloop collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp parallel master taskloop', but found only 1}} - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel master taskloop collapse (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp parallel master taskloop' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp parallel master taskloop collapse (foobool(argc)), collapse (true), collapse (-5) @@ -92,7 +81,7 @@ int main(int argc, char **argv) { #pragma omp parallel master taskloop collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_collapse_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_collapse_messages.cpp index c97cec72b5149..e5b6d01f18b36 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_simd_collapse_messages.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_collapse_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp parallel master taskloop simd collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel master taskloop simd collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel master taskloop simd collapse () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel master taskloop simd collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel master taskloop simd collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp parallel master taskloop simd', but found only 1}} - // expected-error@+6 2 {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'collapse' clause}} - // expected-error@+5 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'collapse' clause}} + // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel master taskloop simd collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel master taskloop simd collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -75,16 +70,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp parallel master taskloop simd', but found only 1}} #pragma omp parallel master taskloop simd collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp parallel master taskloop simd', but found only 1}} - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel master taskloop simd collapse (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp parallel master taskloop simd collapse (foobool(argc)), collapse (true), collapse (-5) @@ -92,7 +81,7 @@ int main(int argc, char **argv) { #pragma omp parallel master taskloop simd collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_safelen_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_safelen_messages.cpp index 612a39f63412a..94173de545be4 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_simd_safelen_messages.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_safelen_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp parallel master taskloop simd safelen // expected-error {{expected '(' after 'safelen'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel master taskloop simd safelen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel master taskloop simd safelen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel master taskloop simd safelen (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel master taskloop simd safelen ((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+6 2 {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'safelen' clause}} - // expected-error@+5 {{argument to 'safelen' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'safelen' clause}} + // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel master taskloop simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel master taskloop simd safelen (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -73,16 +68,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp parallel master taskloop simd safelen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel master taskloop simd safelen (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'safelen' clause}} // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}} #pragma omp parallel master taskloop simd safelen (foobool(argc)), safelen (true), safelen (-5) @@ -90,7 +79,7 @@ int main(int argc, char **argv) { #pragma omp parallel master taskloop simd safelen (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_simdlen_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_simdlen_messages.cpp index 16a543e13ada8..447dd8029c999 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_simd_simdlen_messages.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_simdlen_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp parallel master taskloop simd simdlen // expected-error {{expected '(' after 'simdlen'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel master taskloop simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel master taskloop simd simdlen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel master taskloop simd simdlen (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel master taskloop simd simdlen ((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+6 2 {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'simdlen' clause}} - // expected-error@+5 {{argument to 'simdlen' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'simdlen' clause}} + // expected-error@+2 {{argument to 'simdlen' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel master taskloop simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp parallel master taskloop simd simdlen (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -73,16 +68,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp parallel master taskloop simd simdlen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp parallel master taskloop simd simdlen (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'simdlen' clause}} // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}} #pragma omp parallel master taskloop simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) @@ -90,7 +79,7 @@ int main(int argc, char **argv) { #pragma omp parallel master taskloop simd simdlen (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/simd_collapse_messages.cpp b/clang/test/OpenMP/simd_collapse_messages.cpp index 37915743b1298..1ce3bef3535ce 100644 --- a/clang/test/OpenMP/simd_collapse_messages.cpp +++ b/clang/test/OpenMP/simd_collapse_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp simd collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp simd collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp simd collapse () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp simd collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp simd collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp simd', but found only 1}} - // expected-error@+6 2 {{directive '#pragma omp simd' cannot contain more than one 'collapse' clause}} - // expected-error@+5 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp simd' cannot contain more than one 'collapse' clause}} + // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp simd collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp simd collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -75,16 +70,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp simd', but found only 1}} #pragma omp simd collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp simd' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp simd', but found only 1}} - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp simd collapse (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp simd' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp simd collapse (foobool(argc)), collapse (true), collapse (-5) @@ -92,7 +81,7 @@ int main(int argc, char **argv) { #pragma omp simd collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/simd_safelen_messages.cpp b/clang/test/OpenMP/simd_safelen_messages.cpp index 2cc5a88484489..4269534037464 100644 --- a/clang/test/OpenMP/simd_safelen_messages.cpp +++ b/clang/test/OpenMP/simd_safelen_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp simd safelen // expected-error {{expected '(' after 'safelen'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp simd safelen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp simd safelen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp simd safelen (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp simd safelen ((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+6 2 {{directive '#pragma omp simd' cannot contain more than one 'safelen' clause}} - // expected-error@+5 {{argument to 'safelen' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp simd' cannot contain more than one 'safelen' clause}} + // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp simd safelen (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -73,16 +68,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp simd safelen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp simd safelen (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp simd' cannot contain more than one 'safelen' clause}} // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}} #pragma omp simd safelen (foobool(argc)), safelen (true), safelen (-5) @@ -90,7 +79,7 @@ int main(int argc, char **argv) { #pragma omp simd safelen (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/simd_simdlen_messages.cpp b/clang/test/OpenMP/simd_simdlen_messages.cpp index d7f86e05f3a13..11dafdab9d96e 100644 --- a/clang/test/OpenMP/simd_simdlen_messages.cpp +++ b/clang/test/OpenMP/simd_simdlen_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp simd simdlen // expected-error {{expected '(' after 'simdlen'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp simd simdlen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp simd simdlen (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp simd simdlen ((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+6 2 {{directive '#pragma omp simd' cannot contain more than one 'simdlen' clause}} - // expected-error@+5 {{argument to 'simdlen' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp simd' cannot contain more than one 'simdlen' clause}} + // expected-error@+2 {{argument to 'simdlen' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp simd simdlen (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -73,16 +68,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp simd simdlen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp simd simdlen (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp simd' cannot contain more than one 'simdlen' clause}} // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}} #pragma omp simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) @@ -90,7 +79,7 @@ int main(int argc, char **argv) { #pragma omp simd simdlen (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/target_parallel_for_collapse_messages.cpp b/clang/test/OpenMP/target_parallel_for_collapse_messages.cpp index 0ee442cd443b6..d359d4e90d1d8 100644 --- a/clang/test/OpenMP/target_parallel_for_collapse_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_collapse_messages.cpp @@ -6,29 +6,28 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } bool foobool(int argc) { -#if __cplusplus >= 201103L -// expected-note@-2 4 {{declared here}} -#endif + return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target parallel for collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target parallel for collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target parallel for collapse () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}} @@ -40,18 +39,16 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp target parallel for', but found only 1}} // expected-error@+3 2 {{directive '#pragma omp target parallel for' cannot contain more than one 'collapse' clause}} // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+1 2 {{integral constant expression}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for collapse (foobool(argc)), collapse (true), collapse (-5) -#if __cplusplus >= 201103L -// expected-note@-2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target parallel for collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus >= 201103L // expected-error@+4 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #else - // expected-error@+2 2 {{integral constant expression}} + // expected-error@+2 2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #endif #pragma omp target parallel for collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -75,25 +72,21 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp target parallel for', but found only 1}} #pragma omp target parallel for collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp target parallel for' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp target parallel for', but found only 1}} - #pragma omp target parallel for collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} -#if __cplusplus >= 201103L -// expected-note@-2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + #pragma omp target parallel for collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} + for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+3 {{integral constant expression}} + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp target parallel for' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp target parallel for collapse (foobool(argc)), collapse (true), collapse (-5) -#if __cplusplus >= 201103L -// expected-note@-2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp target parallel for collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus >= 201103L // expected-error@+4 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #else - // expected-error@+2 {{integral constant expression}} + // expected-error@+2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #endif #pragma omp target parallel for collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; diff --git a/clang/test/OpenMP/target_parallel_for_ordered_messages.cpp b/clang/test/OpenMP/target_parallel_for_ordered_messages.cpp index 8396273d65d3c..188d9441d8c93 100644 --- a/clang/test/OpenMP/target_parallel_for_ordered_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_ordered_messages.cpp @@ -6,20 +6,20 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } bool foobool(int argc) { -#if __cplusplus >= 201103L -// expected-note@-2 4 {{declared here}} -#endif + return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target parallel for ordered for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; @@ -29,9 +29,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here} #pragma omp target parallel for ordered() // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; -// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} -// expected-error@+2 2 {{integral constant expression}} -// expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} +// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for ordered(argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; @@ -45,12 +44,10 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here} #pragma omp target parallel for ordered((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'ordered' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp target parallel for', but found only 1}} -#if __cplusplus >= 201103L -// expected-note@+5 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp target parallel for' cannot contain more than one 'ordered' clause}} // expected-error@+2 {{argument to 'ordered' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{integral constant expression}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for ordered(foobool(argc)), ordered(true), ordered(-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; @@ -60,7 +57,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here} #if __cplusplus >= 201103L // expected-error@+4 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #else - // expected-error@+2 2 {{integral constant expression}} + // expected-error@+2 2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #endif #pragma omp target parallel for ordered(argv[1] = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) @@ -92,16 +89,12 @@ int main(int argc, char **argv) { #pragma omp target parallel for ordered(2 + 2)) // expected-warning {{extra tokens at the end of '#pragma omp target parallel for' are ignored}} expected-note {{as specified in 'ordered' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i - 4]; // expected-error {{expected 4 for loops after '#pragma omp target parallel for', but found only 1}} -#if __cplusplus >= 201103L -// expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif -#pragma omp target parallel for ordered(foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} + +#pragma omp target parallel for ordered(foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i - 4]; -#if __cplusplus >= 201103L -// expected-note@+5 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif -// expected-error@+3 {{integral constant expression}} + +// expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp target parallel for' cannot contain more than one 'ordered' clause}} // expected-error@+1 {{argument to 'ordered' clause must be a strictly positive integer value}} #pragma omp target parallel for ordered(foobool(argc)), ordered(true), ordered(-5) @@ -113,7 +106,7 @@ int main(int argc, char **argv) { #if __cplusplus >= 201103L // expected-error@+4 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #else - // expected-error@+2 {{integral constant expression}} + // expected-error@+2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #endif #pragma omp target parallel for ordered(argv[1] = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = 4; i < 12; i++) diff --git a/clang/test/OpenMP/target_parallel_for_simd_collapse_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_collapse_messages.cpp index 309ba48e73fdd..7acb2587f976f 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_collapse_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_collapse_messages.cpp @@ -6,30 +6,28 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L - // expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} - int j; // expected-note {{declared here}} +template +T tmain(T argc, S **argv) { + int j; #pragma omp target parallel for simd collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target parallel for simd collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target parallel for simd collapse () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for simd collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}} @@ -39,18 +37,16 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target parallel for simd collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp target parallel for simd', but found only 1}} -#if __cplusplus >= 201103L - // expected-note@+5 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp target parallel for simd' cannot contain more than one 'collapse' clause}} // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+1 2 {{integral constant expression}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for simd collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target parallel for simd collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-note@+2 {{read of non-const variable 'j' is not allowed in a constant expression}} - // expected-error@+1 {{integral constant expression}} + + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for simd collapse (j=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target parallel for simd collapse (1) @@ -63,7 +59,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} } int main(int argc, char **argv) { - int j; // expected-note {{declared here}} + int j; #pragma omp target parallel for simd collapse // expected-error {{expected '(' after 'collapse'}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp target parallel for simd collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} @@ -75,22 +71,20 @@ int main(int argc, char **argv) { #pragma omp target parallel for simd collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp target parallel for simd' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp target parallel for simd', but found only 1}} #if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} + #endif - #pragma omp target parallel for simd collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} + #pragma omp target parallel for simd collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+5 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif - // expected-error@+3 {{integral constant expression}} + + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp target parallel for simd' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp target parallel for simd collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp target parallel for simd collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-note@+2 {{read of non-const variable 'j' is not allowed in a constant expression}} - // expected-error@+1 {{integral constant expression}} + + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for simd collapse (j=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error@+3 {{statement after '#pragma omp target parallel for simd' must be a for loop}} diff --git a/clang/test/OpenMP/target_parallel_for_simd_ordered_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_ordered_messages.cpp index ed3e065419785..8dd7f68c25fd8 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_ordered_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_ordered_messages.cpp @@ -6,21 +6,20 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L - // expected-note@+2 2 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} +template T tmain(T argc, S **argv) { - int j; // expected-note {{declared here}} + int j; #pragma omp target parallel for simd ordered for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; @@ -56,8 +55,8 @@ T tmain(T argc, S **argv) { #pragma omp target parallel for simd ordered(S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; -// expected-note@+2 {{read of non-const variable 'j' is not allowed in a constant expression}} -// expected-error@+1 {{integral constant expression}} + +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for simd ordered(j = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; @@ -76,7 +75,7 @@ T tmain(T argc, S **argv) { } int main(int argc, char **argv) { - int j; // expected-note {{declared here}} + int j; #pragma omp target parallel for simd ordered for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i - 4]; @@ -94,16 +93,12 @@ int main(int argc, char **argv) { #pragma omp target parallel for simd ordered(2 + 2)) // expected-warning {{extra tokens at the end of '#pragma omp target parallel for simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i - 4]; -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif -#pragma omp target parallel for simd ordered(foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} + +#pragma omp target parallel for simd ordered(foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i - 4]; -#if __cplusplus >= 201103L - // expected-note@+5 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif -// expected-error@+3 {{integral constant expression}} + +// expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp target parallel for simd' cannot contain more than one 'ordered' clause}} // expected-error@+1 {{argument to 'ordered' clause must be a strictly positive integer value}} #pragma omp target parallel for simd ordered(foobool(argc)), ordered(true), ordered(-5) @@ -112,8 +107,8 @@ int main(int argc, char **argv) { #pragma omp target parallel for simd ordered(S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i - 4]; -// expected-note@+2 {{read of non-const variable 'j' is not allowed in a constant expression}} -// expected-error@+1 {{integral constant expression}} + +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for simd ordered(j = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i - 4]; diff --git a/clang/test/OpenMP/target_parallel_for_simd_safelen_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_safelen_messages.cpp index f3d4104deda47..ef47d78caf549 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_safelen_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_safelen_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target parallel for simd safelen // expected-error {{expected '(' after 'safelen'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target parallel for simd safelen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target parallel for simd safelen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for simd safelen (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target parallel for simd safelen ((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+6 2 {{directive '#pragma omp target parallel for simd' cannot contain more than one 'safelen' clause}} - // expected-error@+5 {{argument to 'safelen' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp target parallel for simd' cannot contain more than one 'safelen' clause}} + // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target parallel for simd safelen (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -73,16 +68,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp target parallel for simd safelen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp target parallel for simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for simd safelen (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp target parallel for simd' cannot contain more than one 'safelen' clause}} // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}} #pragma omp target parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) @@ -90,7 +79,7 @@ int main(int argc, char **argv) { #pragma omp target parallel for simd safelen (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/target_parallel_for_simd_simdlen_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_simdlen_messages.cpp index 9d17d54ded7d2..7bf27e9dd9426 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_simdlen_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_simdlen_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target parallel for simd simdlen // expected-error {{expected '(' after 'simdlen'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -29,9 +28,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp target parallel for simd simdlen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} -// expected-error@+2 2 {{integral constant expression}} -// expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} +// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for simd simdlen (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -45,12 +43,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp target parallel for simd simdlen ((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+6 2 {{directive '#pragma omp target parallel for simd' cannot contain more than one 'simdlen' clause}} - // expected-error@+5 {{argument to 'simdlen' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp target parallel for simd' cannot contain more than one 'simdlen' clause}} + // expected-error@+2 {{argument to 'simdlen' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -58,7 +53,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -99,17 +94,11 @@ int main(int argc, char **argv) { #pragma omp target parallel for simd simdlen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp target parallel for simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for simd simdlen (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp target parallel for simd' cannot contain more than one 'simdlen' clause}} // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}} #pragma omp target parallel for simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) @@ -119,7 +108,7 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/target_simd_collapse_messages.cpp b/clang/test/OpenMP/target_simd_collapse_messages.cpp index 0040816c60c1a..00fa3c85279f2 100644 --- a/clang/test/OpenMP/target_simd_collapse_messages.cpp +++ b/clang/test/OpenMP/target_simd_collapse_messages.cpp @@ -4,30 +4,28 @@ // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} - int j; // expected-note {{declared here}} +template +T tmain(T argc, S **argv) { + int j; #pragma omp target simd collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target simd collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target simd collapse () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target simd collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}} @@ -37,18 +35,16 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target simd collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp target simd', but found only 1}} -#if __cplusplus >= 201103L - // expected-note@+5 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp target simd' cannot contain more than one 'collapse' clause}} // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+1 2 {{integral constant expression}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target simd collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target simd collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-note@+2 {{read of non-const variable 'j' is not allowed in a constant expression}} - // expected-error@+1 {{integral constant expression}} + + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target simd collapse (j=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target simd collapse (1) @@ -61,7 +57,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} } int main(int argc, char **argv) { - int j; // expected-note {{declared here}} + int j; #pragma omp target simd collapse // expected-error {{expected '(' after 'collapse'}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp target simd collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} @@ -72,23 +68,19 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp target simd', but found only 1}} #pragma omp target simd collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp target simd' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp target simd', but found only 1}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif - #pragma omp target simd collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} + + #pragma omp target simd collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+5 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif - // expected-error@+3 {{integral constant expression}} + + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp target simd' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp target simd collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp target simd collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-note@+2 {{read of non-const variable 'j' is not allowed in a constant expression}} - // expected-error@+1 {{integral constant expression}} + + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target simd collapse (j=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error@+3 {{statement after '#pragma omp target simd' must be a for loop}} diff --git a/clang/test/OpenMP/target_simd_safelen_messages.cpp b/clang/test/OpenMP/target_simd_safelen_messages.cpp index b64af78f72a25..926cb9f56d51b 100644 --- a/clang/test/OpenMP/target_simd_safelen_messages.cpp +++ b/clang/test/OpenMP/target_simd_safelen_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target simd safelen // expected-error {{expected '(' after 'safelen'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target simd safelen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target simd safelen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target simd safelen (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target simd safelen ((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+6 2 {{directive '#pragma omp target simd' cannot contain more than one 'safelen' clause}} - // expected-error@+5 {{argument to 'safelen' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp target simd' cannot contain more than one 'safelen' clause}} + // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target simd safelen (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -73,16 +68,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp target simd safelen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp target simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target simd safelen (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp target simd' cannot contain more than one 'safelen' clause}} // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}} #pragma omp target simd safelen (foobool(argc)), safelen (true), safelen (-5) @@ -90,7 +79,7 @@ int main(int argc, char **argv) { #pragma omp target simd safelen (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/target_simd_simdlen_messages.cpp b/clang/test/OpenMP/target_simd_simdlen_messages.cpp index 30e8a31069e61..595f88fd351d6 100644 --- a/clang/test/OpenMP/target_simd_simdlen_messages.cpp +++ b/clang/test/OpenMP/target_simd_simdlen_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target simd simdlen // expected-error {{expected '(' after 'simdlen'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -29,9 +28,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp target simd simdlen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} -// expected-error@+2 2 {{integral constant expression}} -// expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} +// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target simd simdlen (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -45,12 +43,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp target simd simdlen ((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+6 2 {{directive '#pragma omp target simd' cannot contain more than one 'simdlen' clause}} - // expected-error@+5 {{argument to 'simdlen' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp target simd' cannot contain more than one 'simdlen' clause}} + // expected-error@+2 {{argument to 'simdlen' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -58,7 +53,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -99,17 +94,11 @@ int main(int argc, char **argv) { #pragma omp target simd simdlen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp target simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target simd simdlen (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp target simd' cannot contain more than one 'simdlen' clause}} // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}} #pragma omp target simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) @@ -119,7 +108,7 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/target_teams_distribute_collapse_messages.cpp b/clang/test/OpenMP/target_teams_distribute_collapse_messages.cpp index 550f644b3f2fb..ef22b5f268d27 100644 --- a/clang/test/OpenMP/target_teams_distribute_collapse_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_collapse_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target teams distribute collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -32,9 +31,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} -// expected-error@+2 2 {{integral constant expression}} -// expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} +// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -52,12 +50,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp target teams distribute', but found only 1}} -#if __cplusplus >= 201103L -// expected-note@+5 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif // expected-error@+3 2 {{directive '#pragma omp target teams distribute' cannot contain more than one 'collapse' clause}} // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{integral constant expression}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -67,7 +62,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -109,18 +104,12 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp target teams distribute', but found only 1}} -// expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L -// expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute collapse (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+5{{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif -// expected-error@+3 {{integral constant expression}} +// expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp target teams distribute' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp target teams distribute collapse (foobool(argc)), collapse (true), collapse (-5) @@ -132,7 +121,7 @@ int main(int argc, char **argv) { argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_messages.cpp index 4af605c20b79f..13d1a789123bf 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++98 -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++11 -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target teams distribute parallel for collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -32,9 +31,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} -// expected-error@+2 2 {{integral constant expression}} -// expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} +// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute parallel for collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -52,12 +50,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp target teams distribute parallel for', but found only 1}} -#if __cplusplus >= 201103L -// expected-note@+5 2 {{non-constexpr function 'foobool' cannot be used}} -#endif // expected-error@+3 2 {{directive '#pragma omp target teams distribute parallel for' cannot contain more than one 'collapse' clause}} // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{integral constant expression}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute parallel for collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -69,7 +64,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #if __cplusplus >= 201103L // expected-error@+4 2 {{integral constant expression must have integral or unscoped enumeration type}} #else -// expected-error@+2 2 {{integral constant expression}} +// expected-error@+2 2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #endif #pragma omp target teams distribute parallel for collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) @@ -109,17 +104,11 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp target teams distribute parallel for', but found only 1}} -#if __cplusplus >= 201103L -// expected-note@+2 {{non-constexpr function 'foobool' cannot be used}} -#endif -#pragma omp target teams distribute parallel for collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} +#pragma omp target teams distribute parallel for collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L -// expected-note@+5 {{non-constexpr function 'foobool' cannot be used}} -#endif -// expected-error@+3 {{integral constant expression}} +// expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp target teams distribute parallel for' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp target teams distribute parallel for collapse (foobool(argc)), collapse (true), collapse (-5) @@ -133,7 +122,7 @@ int main(int argc, char **argv) { #if __cplusplus >= 201103L // expected-error@+4 {{integral constant expression must have integral or unscoped enumeration type}} #else -// expected-error@+2 {{integral constant expression}} +// expected-error@+2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #endif #pragma omp target teams distribute parallel for collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = 4; i < 12; i++) diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_messages.cpp index 65be6e1507362..61508b5152b2c 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++98 -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++11 -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target teams distribute parallel for simd collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -32,9 +31,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} -// expected-error@+2 2 {{integral constant expression}} -// expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} +// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute parallel for simd collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -52,12 +50,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp target teams distribute parallel for simd', but found only 1}} -#if __cplusplus >= 201103L -// expected-note@+5 2 {{non-constexpr function 'foobool' cannot be used}} -#endif // expected-error@+3 2 {{directive '#pragma omp target teams distribute parallel for simd' cannot contain more than one 'collapse' clause}} // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{integral constant expression}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute parallel for simd collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -69,7 +64,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #if __cplusplus >= 201103L // expected-error@+4 2 {{integral constant expression must have integral or unscoped enumeration type}} #else -// expected-error@+2 2 {{integral constant expression}} +// expected-error@+2 2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #endif #pragma omp target teams distribute parallel for simd collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) @@ -109,17 +104,11 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp target teams distribute parallel for simd', but found only 1}} -#if __cplusplus >= 201103L -// expected-note@+2 {{non-constexpr function 'foobool' cannot be used}} -#endif -#pragma omp target teams distribute parallel for simd collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} +#pragma omp target teams distribute parallel for simd collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L -// expected-note@+5 {{non-constexpr function 'foobool' cannot be used}} -#endif -// expected-error@+3 {{integral constant expression}} +// expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp target teams distribute parallel for simd' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp target teams distribute parallel for simd collapse (foobool(argc)), collapse (true), collapse (-5) @@ -133,7 +122,7 @@ int main(int argc, char **argv) { #if __cplusplus >= 201103L // expected-error@+4 {{integral constant expression must have integral or unscoped enumeration type}} #else -// expected-error@+2 {{integral constant expression}} +// expected-error@+2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #endif #pragma omp target teams distribute parallel for simd collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = 4; i < 12; i++) diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_safelen_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_safelen_messages.cpp index 5d0eaafb531b1..31dfb7c8024ef 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_safelen_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_safelen_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target teams distribute parallel for simd safelen // expected-error {{expected '(' after 'safelen'}} for (int i = ST; i < N; i++) @@ -32,7 +31,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp target teams distribute parallel for simd safelen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#pragma omp target teams distribute parallel for simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-error {{expected ')'}} +#pragma omp target teams distribute parallel for simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-note 0+{{constant expression}} expected-error {{expected ')'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -48,12 +47,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#if __cplusplus >= 201103L - // expected-note@+5 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif // expected-error@+3 2 {{directive '#pragma omp target teams distribute parallel for simd' cannot contain more than one 'safelen' clause}} // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{integral constant expression}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -63,7 +59,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -103,19 +99,14 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif -#pragma omp target teams distribute parallel for simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} + +#pragma omp target teams distribute parallel for simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+5 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif // expected-error@+3 {{argument to 'safelen' clause must be a strictly positive integer value}} // expected-error@+2 2 {{directive '#pragma omp target teams distribute parallel for simd' cannot contain more than one 'safelen' clause}} -// expected-error@+1 {{integral constant expression}} +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; @@ -125,7 +116,7 @@ int main(int argc, char **argv) { argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_simdlen_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_simdlen_messages.cpp index 2e0f86cfa9bff..14865e5931299 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_simdlen_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_simdlen_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target teams distribute parallel for simd safelen // expected-error {{expected '(' after 'safelen'}} for (int i = ST; i < N; i++) @@ -32,7 +31,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp target teams distribute parallel for simd safelen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#pragma omp target teams distribute parallel for simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-error {{expected ')'}} +#pragma omp target teams distribute parallel for simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-note 0+{{constant expression}} expected-error {{expected ')'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -48,12 +47,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#if __cplusplus >= 201103L - // expected-note@+5 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif // expected-error@+3 2 {{directive '#pragma omp target teams distribute parallel for simd' cannot contain more than one 'safelen' clause}} // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{integral constant expression}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -63,7 +59,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -103,19 +99,13 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif -#pragma omp target teams distribute parallel for simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} +#pragma omp target teams distribute parallel for simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+5 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif // expected-error@+3 {{argument to 'safelen' clause must be a strictly positive integer value}} // expected-error@+2 2 {{directive '#pragma omp target teams distribute parallel for simd' cannot contain more than one 'safelen' clause}} -// expected-error@+1 {{integral constant expression}} +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; @@ -125,7 +115,7 @@ int main(int argc, char **argv) { argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/target_teams_distribute_simd_collapse_messages.cpp b/clang/test/OpenMP/target_teams_distribute_simd_collapse_messages.cpp index 25e99614eb1f7..500980fbd2c4b 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_collapse_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_collapse_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target teams distribute simd collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -32,9 +31,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} -// expected-error@+2 2 {{integral constant expression}} -// expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} +// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute simd collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -52,12 +50,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp target teams distribute simd', but found only 1}} -// expected-error@+6 2 {{directive '#pragma omp target teams distribute simd' cannot contain more than one 'collapse' clause}} -// expected-error@+5 {{argument to 'collapse' clause must be a strictly positive integer value}} -// expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L -// expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif +// expected-error@+3 2 {{directive '#pragma omp target teams distribute simd' cannot contain more than one 'collapse' clause}} +// expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute simd collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -67,7 +62,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -109,19 +104,13 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp target teams distribute simd', but found only 1}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif -#pragma omp target teams distribute simd collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} +#pragma omp target teams distribute simd collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -// expected-error@+6 {{integral constant expression}} -// expected-error@+5 2 {{directive '#pragma omp target teams distribute simd' cannot contain more than one 'collapse' clause}} -// expected-error@+4 {{argument to 'collapse' clause must be a strictly positive integer value}} -#if __cplusplus >= 201103L -// expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif +// expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} +// expected-error@+2 2 {{directive '#pragma omp target teams distribute simd' cannot contain more than one 'collapse' clause}} +// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp target teams distribute simd collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; @@ -133,7 +122,7 @@ int main(int argc, char **argv) { #if __cplusplus >= 201103L // expected-error@+4 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #else - // expected-error@+2 {{integral constant expression}} + // expected-error@+2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #endif #pragma omp target teams distribute simd collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = 4; i < 12; i++) diff --git a/clang/test/OpenMP/target_teams_distribute_simd_safelen_messages.cpp b/clang/test/OpenMP/target_teams_distribute_simd_safelen_messages.cpp index af055250d1719..a472b8eddee85 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_safelen_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_safelen_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target teams distribute simd safelen // expected-error {{expected '(' after 'safelen'}} for (int i = ST; i < N; i++) @@ -32,7 +31,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp target teams distribute simd safelen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#pragma omp target teams distribute simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-error {{expected ')'}} +#pragma omp target teams distribute simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-note 0+{{constant expression}} expected-error {{expected ')'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -48,12 +47,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#if __cplusplus >= 201103L - // expected-note@+5 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif // expected-error@+3 2 {{directive '#pragma omp target teams distribute simd' cannot contain more than one 'safelen' clause}} // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{integral constant expression}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -63,7 +59,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -103,19 +99,14 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif -#pragma omp target teams distribute simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} + +#pragma omp target teams distribute simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+5 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif // expected-error@+3 {{argument to 'safelen' clause must be a strictly positive integer value}} // expected-error@+2 2 {{directive '#pragma omp target teams distribute simd' cannot contain more than one 'safelen' clause}} -// expected-error@+1 {{integral constant expression}} +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; @@ -125,7 +116,7 @@ int main(int argc, char **argv) { argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/target_teams_distribute_simd_simdlen_messages.cpp b/clang/test/OpenMP/target_teams_distribute_simd_simdlen_messages.cpp index f54da20babd89..489bb808db546 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_simdlen_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_simdlen_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target teams distribute simd safelen // expected-error {{expected '(' after 'safelen'}} for (int i = ST; i < N; i++) @@ -32,7 +31,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #pragma omp target teams distribute simd safelen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#pragma omp target teams distribute simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-error {{expected ')'}} +#pragma omp target teams distribute simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-note 0+{{constant expression}} expected-error {{expected ')'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -48,12 +47,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#if __cplusplus >= 201103L - // expected-note@+5 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif // expected-error@+3 2 {{directive '#pragma omp target teams distribute simd' cannot contain more than one 'safelen' clause}} // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{integral constant expression}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -63,7 +59,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -103,19 +99,14 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif -#pragma omp target teams distribute simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} + +#pragma omp target teams distribute simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+5 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif // expected-error@+3 {{argument to 'safelen' clause must be a strictly positive integer value}} // expected-error@+2 2 {{directive '#pragma omp target teams distribute simd' cannot contain more than one 'safelen' clause}} -// expected-error@+1 {{integral constant expression}} +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target teams distribute simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; @@ -125,7 +116,7 @@ int main(int argc, char **argv) { argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/taskloop_collapse_messages.cpp b/clang/test/OpenMP/taskloop_collapse_messages.cpp index d004c929dd0de..b07f27a02ff5b 100644 --- a/clang/test/OpenMP/taskloop_collapse_messages.cpp +++ b/clang/test/OpenMP/taskloop_collapse_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp taskloop collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp taskloop collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp taskloop collapse () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp taskloop collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp taskloop collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp taskloop', but found only 1}} - // expected-error@+6 2 {{directive '#pragma omp taskloop' cannot contain more than one 'collapse' clause}} - // expected-error@+5 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp taskloop' cannot contain more than one 'collapse' clause}} + // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp taskloop collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp taskloop collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -75,16 +70,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp taskloop', but found only 1}} #pragma omp taskloop collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp taskloop', but found only 1}} - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp taskloop collapse (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp taskloop' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp taskloop collapse (foobool(argc)), collapse (true), collapse (-5) @@ -92,7 +81,7 @@ int main(int argc, char **argv) { #pragma omp taskloop collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/taskloop_simd_collapse_messages.cpp b/clang/test/OpenMP/taskloop_simd_collapse_messages.cpp index 485450966898c..59c87755e338c 100644 --- a/clang/test/OpenMP/taskloop_simd_collapse_messages.cpp +++ b/clang/test/OpenMP/taskloop_simd_collapse_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp taskloop simd collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp taskloop simd collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp taskloop simd collapse () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp taskloop simd collapse (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp taskloop simd collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp taskloop simd', but found only 1}} - // expected-error@+6 2 {{directive '#pragma omp taskloop simd' cannot contain more than one 'collapse' clause}} - // expected-error@+5 {{argument to 'collapse' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp taskloop simd' cannot contain more than one 'collapse' clause}} + // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp taskloop simd collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp taskloop simd collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -75,16 +70,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp taskloop simd', but found only 1}} #pragma omp taskloop simd collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}} expected-note {{as specified in 'collapse' clause}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp taskloop simd', but found only 1}} - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp taskloop simd collapse (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp taskloop simd' cannot contain more than one 'collapse' clause}} // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp taskloop simd collapse (foobool(argc)), collapse (true), collapse (-5) @@ -92,7 +81,7 @@ int main(int argc, char **argv) { #pragma omp taskloop simd collapse (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/taskloop_simd_safelen_messages.cpp b/clang/test/OpenMP/taskloop_simd_safelen_messages.cpp index 491bc5089edab..4930de7eb90b6 100644 --- a/clang/test/OpenMP/taskloop_simd_safelen_messages.cpp +++ b/clang/test/OpenMP/taskloop_simd_safelen_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp taskloop simd safelen // expected-error {{expected '(' after 'safelen'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp taskloop simd safelen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp taskloop simd safelen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp taskloop simd safelen (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp taskloop simd safelen ((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+6 2 {{directive '#pragma omp taskloop simd' cannot contain more than one 'safelen' clause}} - // expected-error@+5 {{argument to 'safelen' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp taskloop simd' cannot contain more than one 'safelen' clause}} + // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp taskloop simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp taskloop simd safelen (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -73,16 +68,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp taskloop simd safelen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp taskloop simd safelen (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp taskloop simd' cannot contain more than one 'safelen' clause}} // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}} #pragma omp taskloop simd safelen (foobool(argc)), safelen (true), safelen (-5) @@ -90,7 +79,7 @@ int main(int argc, char **argv) { #pragma omp taskloop simd safelen (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/taskloop_simd_simdlen_messages.cpp b/clang/test/OpenMP/taskloop_simd_simdlen_messages.cpp index cd935c9c1dec1..af8c553ea4e04 100644 --- a/clang/test/OpenMP/taskloop_simd_simdlen_messages.cpp +++ b/clang/test/OpenMP/taskloop_simd_simdlen_messages.cpp @@ -6,29 +6,27 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp taskloop simd simdlen // expected-error {{expected '(' after 'simdlen'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp taskloop simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp taskloop simd simdlen () // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} - // expected-error@+2 2 {{integral constant expression}} - // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + // expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp taskloop simd simdlen (argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}} @@ -38,18 +36,15 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp taskloop simd simdlen ((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+6 2 {{directive '#pragma omp taskloop simd' cannot contain more than one 'simdlen' clause}} - // expected-error@+5 {{argument to 'simdlen' clause must be a strictly positive integer value}} - // expected-error@+4 2 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 2 {{directive '#pragma omp taskloop simd' cannot contain more than one 'simdlen' clause}} + // expected-error@+2 {{argument to 'simdlen' clause must be a strictly positive integer value}} + // expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp taskloop simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp taskloop simd simdlen (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+4 2 {{integral constant expression}} + // expected-error@+4 2 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -73,16 +68,10 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #pragma omp taskloop simd simdlen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+4 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp taskloop simd simdlen (foobool(1) > 0 ? 1 : 2) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; - // expected-error@+6 {{integral constant expression}} -#if __cplusplus >= 201103L - // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} // expected-error@+2 2 {{directive '#pragma omp taskloop simd' cannot contain more than one 'simdlen' clause}} // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}} #pragma omp taskloop simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) @@ -90,7 +79,7 @@ int main(int argc, char **argv) { #pragma omp taskloop simd simdlen (S1) // expected-error {{'S1' does not refer to a value}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+4 {{integral constant expression}} + // expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} #else // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/teams_distribute_collapse_messages.cpp b/clang/test/OpenMP/teams_distribute_collapse_messages.cpp index f6c9fd948b300..50d8ab2686d39 100644 --- a/clang/test/OpenMP/teams_distribute_collapse_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_collapse_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++98 -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++11 -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target #pragma omp teams distribute collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) @@ -35,9 +34,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -// expected-error@+4 {{expected ')'}} expected-note@+4 {{to match this '('}} -// expected-error@+3 2 {{integral constant expression}} -// expected-note@+2 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} +// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} +// expected-error@+2 2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #pragma omp target #pragma omp teams distribute collapse (argc for (int i = ST; i < N; i++) @@ -59,12 +57,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp teams distribute', but found only 1}} -#if __cplusplus >= 201103L -// expected-note@+6 2 {{non-constexpr function 'foobool' cannot be used}} -#endif // expected-error@+4 2 {{directive '#pragma omp teams distribute' cannot contain more than one 'collapse' clause}} // expected-error@+3 {{argument to 'collapse' clause must be a strictly positive integer value}} -// expected-error@+2 2 {{integral constant expression}} +// expected-error@+2 2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #pragma omp target #pragma omp teams distribute collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) @@ -78,7 +73,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #if __cplusplus >= 201103L // expected-error@+5 2 {{integral constant expression must have integral or unscoped enumeration type}} #else -// expected-error@+3 2 {{integral constant expression}} +// expected-error@+3 2 {{integral constant expression}} expected-note@+3 0+{{constant expression}} #endif #pragma omp target #pragma omp teams distribute collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} @@ -127,18 +122,12 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp teams distribute', but found only 1}} -#if __cplusplus >= 201103L -// expected-note@+3 {{non-constexpr function 'foobool' cannot be used}} -#endif #pragma omp target -#pragma omp teams distribute collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} +#pragma omp teams distribute collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L -// expected-note@+6 {{non-constexpr function 'foobool' cannot be used}} -#endif -// expected-error@+4 {{integral constant expression}} +// expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} // expected-error@+3 2 {{directive '#pragma omp teams distribute' cannot contain more than one 'collapse' clause}} // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp target @@ -154,7 +143,7 @@ int main(int argc, char **argv) { #if __cplusplus >= 201103L // expected-error@+5 {{integral constant expression must have integral or unscoped enumeration type}} #else -// expected-error@+3 {{integral constant expression}} +// expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} #endif #pragma omp target #pragma omp teams distribute collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_messages.cpp index 1c83ac83785c3..1e7d7942b672b 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++98 -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++11 -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target #pragma omp teams distribute parallel for collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) @@ -35,9 +34,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -// expected-error@+4 {{expected ')'}} expected-note@+4 {{to match this '('}} -// expected-error@+3 2 {{integral constant expression}} -// expected-note@+2 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} +// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} +// expected-error@+2 2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #pragma omp target #pragma omp teams distribute parallel for collapse (argc for (int i = ST; i < N; i++) @@ -59,12 +57,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp teams distribute parallel for', but found only 1}} -#if __cplusplus >= 201103L -// expected-note@+6 2 {{non-constexpr function 'foobool' cannot be used}} -#endif // expected-error@+4 2 {{directive '#pragma omp teams distribute parallel for' cannot contain more than one 'collapse' clause}} // expected-error@+3 {{argument to 'collapse' clause must be a strictly positive integer value}} -// expected-error@+2 2 {{integral constant expression}} +// expected-error@+2 2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #pragma omp target #pragma omp teams distribute parallel for collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) @@ -77,7 +72,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #if __cplusplus >= 201103L // expected-error@+5 2 {{integral constant expression must have integral or unscoped enumeration type}} #else -// expected-error@+3 2 {{integral constant expression}} +// expected-error@+3 2 {{integral constant expression}} expected-note@+3 0+{{constant expression}} #endif #pragma omp target #pragma omp teams distribute parallel for collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} @@ -126,18 +121,12 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp teams distribute parallel for', but found only 1}} -#if __cplusplus >= 201103L -// expected-note@+3 {{non-constexpr function 'foobool' cannot be used}} -#endif #pragma omp target -#pragma omp teams distribute parallel for collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} +#pragma omp teams distribute parallel for collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L -// expected-note@+6 {{non-constexpr function 'foobool' cannot be used}} -#endif -// expected-error@+4 {{integral constant expression}} +// expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} // expected-error@+3 2 {{directive '#pragma omp teams distribute parallel for' cannot contain more than one 'collapse' clause}} // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp target @@ -153,7 +142,7 @@ int main(int argc, char **argv) { #if __cplusplus >= 201103L // expected-error@+5 {{integral constant expression must have integral or unscoped enumeration type}} #else -// expected-error@+3 {{integral constant expression}} +// expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} #endif #pragma omp target #pragma omp teams distribute parallel for collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_messages.cpp index 8558f1b31bde6..63132be20c9b7 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++98 -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++11 -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target #pragma omp teams distribute parallel for simd collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) @@ -35,9 +34,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -// expected-error@+4 {{expected ')'}} expected-note@+4 {{to match this '('}} -// expected-error@+3 2 {{integral constant expression}} -// expected-note@+2 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} +// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} +// expected-error@+2 2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #pragma omp target #pragma omp teams distribute parallel for simd collapse (argc for (int i = ST; i < N; i++) @@ -59,12 +57,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp teams distribute parallel for simd', but found only 1}} -#if __cplusplus >= 201103L -// expected-note@+6 2 {{non-constexpr function 'foobool' cannot be used}} -#endif // expected-error@+4 2 {{directive '#pragma omp teams distribute parallel for simd' cannot contain more than one 'collapse' clause}} // expected-error@+3 {{argument to 'collapse' clause must be a strictly positive integer value}} -// expected-error@+2 2 {{integral constant expression}} +// expected-error@+2 2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #pragma omp target #pragma omp teams distribute parallel for simd collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) @@ -77,7 +72,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} #if __cplusplus >= 201103L // expected-error@+5 2 {{integral constant expression must have integral or unscoped enumeration type}} #else -// expected-error@+3 2 {{integral constant expression}} +// expected-error@+3 2 {{integral constant expression}} expected-note@+3 0+{{constant expression}} #endif #pragma omp target #pragma omp teams distribute parallel for simd collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} @@ -126,18 +121,12 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp teams distribute parallel for simd', but found only 1}} -#if __cplusplus >= 201103L -// expected-note@+3 {{non-constexpr function 'foobool' cannot be used}} -#endif #pragma omp target -#pragma omp teams distribute parallel for simd collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} +#pragma omp teams distribute parallel for simd collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L -// expected-note@+6 {{non-constexpr function 'foobool' cannot be used}} -#endif -// expected-error@+4 {{integral constant expression}} +// expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} // expected-error@+3 2 {{directive '#pragma omp teams distribute parallel for simd' cannot contain more than one 'collapse' clause}} // expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp target @@ -153,7 +142,7 @@ int main(int argc, char **argv) { #if __cplusplus >= 201103L // expected-error@+5 {{integral constant expression must have integral or unscoped enumeration type}} #else -// expected-error@+3 {{integral constant expression}} +// expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} #endif #pragma omp target #pragma omp teams distribute parallel for simd collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_safelen_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_safelen_messages.cpp index 019427fe3f3e8..122b9cbe098b0 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_safelen_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_safelen_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target #pragma omp teams distribute parallel for simd safelen // expected-error {{expected '(' after 'safelen'}} @@ -36,7 +35,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target -#pragma omp teams distribute parallel for simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-error {{expected ')'}} +#pragma omp teams distribute parallel for simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-note 0+{{constant expression}} expected-error {{expected ')'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -55,13 +54,10 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#if __cplusplus >= 201103L - // expected-note@+6 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target // expected-error@+3 2 {{directive '#pragma omp teams distribute parallel for simd' cannot contain more than one 'safelen' clause}} // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{integral constant expression}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp teams distribute parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -72,7 +68,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+5 2 {{integral constant expression}} + // expected-error@+5 2 {{integral constant expression}} expected-note@+5 0+{{constant expression}} #else // expected-error@+3 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -120,21 +116,16 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+3 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + #pragma omp target -#pragma omp teams distribute parallel for simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} +#pragma omp teams distribute parallel for simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+6 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target // expected-error@+3 {{argument to 'safelen' clause must be a strictly positive integer value}} // expected-error@+2 2 {{directive '#pragma omp teams distribute parallel for simd' cannot contain more than one 'safelen' clause}} -// expected-error@+1 {{integral constant expression}} +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp teams distribute parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; @@ -145,7 +136,7 @@ int main(int argc, char **argv) { argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+5 {{integral constant expression}} + // expected-error@+5 {{integral constant expression}} expected-note@+5 0+{{constant expression}} #else // expected-error@+3 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_simdlen_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_simdlen_messages.cpp index 019427fe3f3e8..122b9cbe098b0 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_simdlen_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_simdlen_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target #pragma omp teams distribute parallel for simd safelen // expected-error {{expected '(' after 'safelen'}} @@ -36,7 +35,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target -#pragma omp teams distribute parallel for simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-error {{expected ')'}} +#pragma omp teams distribute parallel for simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-note 0+{{constant expression}} expected-error {{expected ')'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -55,13 +54,10 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#if __cplusplus >= 201103L - // expected-note@+6 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target // expected-error@+3 2 {{directive '#pragma omp teams distribute parallel for simd' cannot contain more than one 'safelen' clause}} // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{integral constant expression}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp teams distribute parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -72,7 +68,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+5 2 {{integral constant expression}} + // expected-error@+5 2 {{integral constant expression}} expected-note@+5 0+{{constant expression}} #else // expected-error@+3 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -120,21 +116,16 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+3 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + #pragma omp target -#pragma omp teams distribute parallel for simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} +#pragma omp teams distribute parallel for simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+6 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target // expected-error@+3 {{argument to 'safelen' clause must be a strictly positive integer value}} // expected-error@+2 2 {{directive '#pragma omp teams distribute parallel for simd' cannot contain more than one 'safelen' clause}} -// expected-error@+1 {{integral constant expression}} +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp teams distribute parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; @@ -145,7 +136,7 @@ int main(int argc, char **argv) { argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+5 {{integral constant expression}} + // expected-error@+5 {{integral constant expression}} expected-note@+5 0+{{constant expression}} #else // expected-error@+3 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/teams_distribute_simd_collapse_messages.cpp b/clang/test/OpenMP/teams_distribute_simd_collapse_messages.cpp index e7ce4d94386fa..bc1833d9f6052 100644 --- a/clang/test/OpenMP/teams_distribute_simd_collapse_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_collapse_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target #pragma omp teams distribute simd collapse // expected-error {{expected '(' after 'collapse'}} for (int i = ST; i < N; i++) @@ -35,9 +34,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -// expected-error@+4 {{expected ')'}} expected-note@+4 {{to match this '('}} -// expected-error@+3 2 {{integral constant expression}} -// expected-note@+2 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} +// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} +// expected-error@+2 2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #pragma omp target #pragma omp teams distribute simd collapse (argc for (int i = ST; i < N; i++) @@ -59,12 +57,9 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp teams distribute simd', but found only 1}} -// expected-error@+7 2 {{directive '#pragma omp teams distribute simd' cannot contain more than one 'collapse' clause}} -// expected-error@+6 {{argument to 'collapse' clause must be a strictly positive integer value}} -// expected-error@+5 2 {{integral constant expression}} -#if __cplusplus >= 201103L -// expected-note@+3 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif +// expected-error@+4 2 {{directive '#pragma omp teams distribute simd' cannot contain more than one 'collapse' clause}} +// expected-error@+3 {{argument to 'collapse' clause must be a strictly positive integer value}} +// expected-error@+2 2 {{integral constant expression}} expected-note@+2 0+{{constant expression}} #pragma omp target #pragma omp teams distribute simd collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = ST; i < N; i++) @@ -75,7 +70,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+5 2 {{integral constant expression}} + // expected-error@+5 2 {{integral constant expression}} expected-note@+5 0+{{constant expression}} #else // expected-error@+3 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -126,20 +121,14 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp teams distribute simd', but found only 1}} -#if __cplusplus >= 201103L - // expected-note@+3 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target -#pragma omp teams distribute simd collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} +#pragma omp teams distribute simd collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -// expected-error@+7 {{integral constant expression}} -// expected-error@+6 2 {{directive '#pragma omp teams distribute simd' cannot contain more than one 'collapse' clause}} -// expected-error@+5 {{argument to 'collapse' clause must be a strictly positive integer value}} -#if __cplusplus >= 201103L -// expected-note@+3 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif +// expected-error@+4 {{integral constant expression}} expected-note@+4 0+{{constant expression}} +// expected-error@+3 2 {{directive '#pragma omp teams distribute simd' cannot contain more than one 'collapse' clause}} +// expected-error@+2 {{argument to 'collapse' clause must be a strictly positive integer value}} #pragma omp target #pragma omp teams distribute simd collapse (foobool(argc)), collapse (true), collapse (-5) for (int i = 4; i < 12; i++) @@ -153,7 +142,7 @@ int main(int argc, char **argv) { #if __cplusplus >= 201103L // expected-error@+5 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #else - // expected-error@+3 {{integral constant expression}} + // expected-error@+3 {{integral constant expression}} expected-note@+3 0+{{constant expression}} #endif #pragma omp target #pragma omp teams distribute simd collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} diff --git a/clang/test/OpenMP/teams_distribute_simd_safelen_messages.cpp b/clang/test/OpenMP/teams_distribute_simd_safelen_messages.cpp index fe83ffdca020c..4efede29baee8 100644 --- a/clang/test/OpenMP/teams_distribute_simd_safelen_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_safelen_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target #pragma omp teams distribute simd safelen // expected-error {{expected '(' after 'safelen'}} @@ -36,7 +35,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target -#pragma omp teams distribute simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-error {{expected ')'}} +#pragma omp teams distribute simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-note 0+{{constant expression}} expected-error {{expected ')'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -55,13 +54,10 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#if __cplusplus >= 201103L - // expected-note@+6 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target // expected-error@+3 2 {{directive '#pragma omp teams distribute simd' cannot contain more than one 'safelen' clause}} // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{integral constant expression}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp teams distribute simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -72,7 +68,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+5 2 {{integral constant expression}} + // expected-error@+5 2 {{integral constant expression}} expected-note@+5 0+{{constant expression}} #else // expected-error@+3 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -120,21 +116,16 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+3 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + #pragma omp target -#pragma omp teams distribute simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} +#pragma omp teams distribute simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+6 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target // expected-error@+3 {{argument to 'safelen' clause must be a strictly positive integer value}} // expected-error@+2 2 {{directive '#pragma omp teams distribute simd' cannot contain more than one 'safelen' clause}} -// expected-error@+1 {{integral constant expression}} +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp teams distribute simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; @@ -145,7 +136,7 @@ int main(int argc, char **argv) { argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+5 {{integral constant expression}} + // expected-error@+5 {{integral constant expression}} expected-note@+5 0+{{constant expression}} #else // expected-error@+3 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif diff --git a/clang/test/OpenMP/teams_distribute_simd_simdlen_messages.cpp b/clang/test/OpenMP/teams_distribute_simd_simdlen_messages.cpp index fe83ffdca020c..4efede29baee8 100644 --- a/clang/test/OpenMP/teams_distribute_simd_simdlen_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_simdlen_messages.cpp @@ -6,20 +6,19 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized // RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// expected-note@* 0+{{declared here}} + void foo() { } -#if __cplusplus >= 201103L -// expected-note@+2 4 {{declared here}} -#endif bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; -template // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +template +T tmain(T argc, S **argv) { #pragma omp target #pragma omp teams distribute simd safelen // expected-error {{expected '(' after 'safelen'}} @@ -36,7 +35,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target -#pragma omp teams distribute simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-error {{expected ')'}} +#pragma omp teams distribute simd safelen (argc // expected-note {{to match this '('}} expected-error 2 {{integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-note 0+{{constant expression}} expected-error {{expected ')'}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -55,13 +54,10 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#if __cplusplus >= 201103L - // expected-note@+6 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target // expected-error@+3 2 {{directive '#pragma omp teams distribute simd' cannot contain more than one 'safelen' clause}} // expected-error@+2 {{argument to 'safelen' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{integral constant expression}} +// expected-error@+1 2 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp teams distribute simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; @@ -72,7 +68,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} argv[0][i] = argv[0][i] - argv[0][i-ST]; #if __cplusplus <= 199711L - // expected-error@+5 2 {{integral constant expression}} + // expected-error@+5 2 {{integral constant expression}} expected-note@+5 0+{{constant expression}} #else // expected-error@+3 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif @@ -120,21 +116,16 @@ int main(int argc, char **argv) { for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+3 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif + #pragma omp target -#pragma omp teams distribute simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} +#pragma omp teams distribute simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{integral constant expression}} expected-note 0+{{constant expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; -#if __cplusplus >= 201103L - // expected-note@+6 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif #pragma omp target // expected-error@+3 {{argument to 'safelen' clause must be a strictly positive integer value}} // expected-error@+2 2 {{directive '#pragma omp teams distribute simd' cannot contain more than one 'safelen' clause}} -// expected-error@+1 {{integral constant expression}} +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp teams distribute simd safelen (foobool(argc)), safelen (true), safelen (-5) for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; @@ -145,7 +136,7 @@ int main(int argc, char **argv) { argv[0][i] = argv[0][i] - argv[0][i-4]; #if __cplusplus <= 199711L - // expected-error@+5 {{integral constant expression}} + // expected-error@+5 {{integral constant expression}} expected-note@+5 0+{{constant expression}} #else // expected-error@+3 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} #endif From ded79be63555f4e5bfdb0db27ef22b71fe568474 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Mon, 5 Oct 2020 17:52:23 -0700 Subject: [PATCH 095/321] [c++17] Implement P0145R3 during constant evaluation. Ensure that we evaluate assignment and compound-assignment right-to-left, and array subscripting left-to-right. Fixes PR47724. --- clang/lib/AST/ExprConstant.cpp | 95 +++++++++------ .../SemaCXX/constant-expression-cxx1z.cpp | 109 ++++++++++++++++++ clang/www/cxx_status.html | 1 + 3 files changed, 169 insertions(+), 36 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 4460e3a17e6da..49ad01f275455 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -1856,8 +1856,12 @@ void CallStackFrame::describe(raw_ostream &Out) { Out << ", "; const ParmVarDecl *Param = *I; - const APValue &Arg = Arguments[ArgIndex]; - Arg.printPretty(Out, Info.Ctx, Param->getType()); + if (Arguments) { + const APValue &Arg = Arguments[ArgIndex]; + Arg.printPretty(Out, Info.Ctx, Param->getType()); + } else { + Out << "<...>"; + } if (ArgIndex == 0 && IsMemberCall) Out << "->" << *Callee << '('; @@ -5792,6 +5796,8 @@ typedef SmallVector ArgVector; /// EvaluateArgs - Evaluate the arguments to a function call. static bool EvaluateArgs(ArrayRef Args, ArgVector &ArgValues, EvalInfo &Info, const FunctionDecl *Callee) { + ArgValues.resize(Args.size()); + bool Success = true; llvm::SmallBitVector ForbiddenNullArgs; if (Callee->hasAttr()) { @@ -5809,8 +5815,6 @@ static bool EvaluateArgs(ArrayRef Args, ArgVector &ArgValues, } } } - // FIXME: This is the wrong evaluation order for an assignment operator - // called via operator syntax. for (unsigned Idx = 0; Idx < Args.size(); Idx++) { if (!Evaluate(ArgValues[Idx], Info, Args[Idx])) { // If we're checking for a potential constant expression, evaluate all @@ -5834,17 +5838,13 @@ static bool EvaluateArgs(ArrayRef Args, ArgVector &ArgValues, /// Evaluate a function call. static bool HandleFunctionCall(SourceLocation CallLoc, const FunctionDecl *Callee, const LValue *This, - ArrayRef Args, const Stmt *Body, - EvalInfo &Info, APValue &Result, - const LValue *ResultSlot) { - ArgVector ArgValues(Args.size()); - if (!EvaluateArgs(Args, ArgValues, Info, Callee)) - return false; - + ArrayRef Args, APValue *ArgValues, + const Stmt *Body, EvalInfo &Info, + APValue &Result, const LValue *ResultSlot) { if (!Info.CheckCallLimit(CallLoc)) return false; - CallStackFrame Frame(Info, CallLoc, Callee, This, ArgValues.data()); + CallStackFrame Frame(Info, CallLoc, Callee, This, ArgValues); // For a trivial copy or move assignment, perform an APValue copy. This is // essential for unions, where the operations performed by the assignment @@ -7293,6 +7293,8 @@ class ExprEvaluatorBase auto Args = llvm::makeArrayRef(E->getArgs(), E->getNumArgs()); bool HasQualifier = false; + ArgVector ArgValues; + // Extract function decl and 'this' pointer from the callee. if (CalleeType->isSpecificBuiltinType(BuiltinType::BoundMember)) { const CXXMethodDecl *Member = nullptr; @@ -7341,6 +7343,22 @@ class ExprEvaluatorBase return Error(E); } + // For an (overloaded) assignment expression, evaluate the RHS before the + // LHS. + auto *OCE = dyn_cast(E); + if (OCE && OCE->isAssignmentOp()) { + assert(Args.size() == 2 && "wrong number of arguments in assignment"); + if (isa(FD)) { + // Args[0] is the object argument. + if (!EvaluateArgs({Args[1]}, ArgValues, Info, FD)) + return false; + } else { + if (!EvaluateArgs({Args[1], Args[0]}, ArgValues, Info, FD)) + return false; + std::swap(ArgValues[0], ArgValues[1]); + } + } + // Overloaded operator calls to member functions are represented as normal // calls with '*this' as the first argument. const CXXMethodDecl *MD = dyn_cast(FD); @@ -7403,6 +7421,11 @@ class ExprEvaluatorBase } else return Error(E); + // Evaluate the arguments now if we've not already done so. + if (ArgValues.empty() && !Args.empty() && + !EvaluateArgs(Args, ArgValues, Info, FD)) + return false; + SmallVector CovariantAdjustmentPath; if (This) { auto *NamedMember = dyn_cast(FD); @@ -7424,6 +7447,7 @@ class ExprEvaluatorBase // Destructor calls are different enough that they have their own codepath. if (auto *DD = dyn_cast(FD)) { assert(This && "no 'this' pointer for destructor call"); + assert(ArgValues.empty() && "unexpected destructor arguments"); return HandleDestruction(Info, E, *This, Info.Ctx.getRecordType(DD->getParent())); } @@ -7432,8 +7456,8 @@ class ExprEvaluatorBase Stmt *Body = FD->getBody(Definition); if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition, Body) || - !HandleFunctionCall(E->getExprLoc(), Definition, This, Args, Body, Info, - Result, ResultSlot)) + !HandleFunctionCall(E->getExprLoc(), Definition, This, Args, + ArgValues.data(), Body, Info, Result, ResultSlot)) return false; if (!CovariantAdjustmentPath.empty() && @@ -8071,16 +8095,19 @@ bool LValueExprEvaluator::VisitArraySubscriptExpr(const ArraySubscriptExpr *E) { if (E->getBase()->getType()->isVectorType()) return Error(E); + APSInt Index; bool Success = true; - if (!evaluatePointer(E->getBase(), Result)) { - if (!Info.noteFailure()) - return false; - Success = false; - } - APSInt Index; - if (!EvaluateInteger(E->getIdx(), Index, Info)) - return false; + // C++17's rules require us to evaluate the LHS first, regardless of which + // side is the base. + for (const Expr *SubExpr : {E->getLHS(), E->getRHS()}) { + if (SubExpr == E->getBase() ? !evaluatePointer(SubExpr, Result) + : !EvaluateInteger(SubExpr, Index, Info)) { + if (!Info.noteFailure()) + return false; + Success = false; + } + } return Success && HandleLValueArrayAdjustment(Info, E, Result, E->getType(), Index); @@ -8125,7 +8152,10 @@ bool LValueExprEvaluator::VisitCompoundAssignOperator( if (!Info.getLangOpts().CPlusPlus14 && !Info.keepEvaluatingAfterFailure()) return Error(CAO); + // C++17 onwards require that we evaluate the RHS first. APValue RHS; + if (!Evaluate(RHS, this->Info, CAO->getRHS())) + return false; // The overall lvalue result is the result of evaluating the LHS. if (!this->Visit(CAO->getLHS())) { @@ -8134,9 +8164,6 @@ bool LValueExprEvaluator::VisitCompoundAssignOperator( return false; } - if (!Evaluate(RHS, this->Info, CAO->getRHS())) - return false; - return handleCompoundAssignment( this->Info, CAO, Result, CAO->getLHS()->getType(), CAO->getComputationLHSType(), @@ -8147,7 +8174,10 @@ bool LValueExprEvaluator::VisitBinAssign(const BinaryOperator *E) { if (!Info.getLangOpts().CPlusPlus14 && !Info.keepEvaluatingAfterFailure()) return Error(E); + // C++17 onwards require that we evaluate the RHS first. APValue NewVal; + if (!Evaluate(NewVal, this->Info, E->getRHS())) + return false; if (!this->Visit(E->getLHS())) { if (Info.noteFailure()) @@ -8155,9 +8185,6 @@ bool LValueExprEvaluator::VisitBinAssign(const BinaryOperator *E) { return false; } - if (!Evaluate(NewVal, this->Info, E->getRHS())) - return false; - if (Info.getLangOpts().CPlusPlus20 && !HandleUnionActiveMemberChange(Info, E->getLHS(), Result)) return false; @@ -15270,7 +15297,8 @@ bool Expr::isPotentialConstantExpr(const FunctionDecl *FD, } else { SourceLocation Loc = FD->getLocation(); HandleFunctionCall(Loc, FD, (MD && MD->isInstance()) ? &This : nullptr, - Args, FD->getBody(), Info, Scratch, nullptr); + Args, /*ArgValues*/ nullptr, FD->getBody(), Info, + Scratch, nullptr); } return Diags.empty(); @@ -15292,13 +15320,8 @@ bool Expr::isPotentialConstantExprUnevaluated(Expr *E, Info.CheckingPotentialConstantExpression = true; // Fabricate a call stack frame to give the arguments a plausible cover story. - ArrayRef Args; - ArgVector ArgValues(0); - bool Success = EvaluateArgs(Args, ArgValues, Info, FD); - (void)Success; - assert(Success && - "Failed to set up arguments for potential constant evaluation"); - CallStackFrame Frame(Info, SourceLocation(), FD, nullptr, ArgValues.data()); + CallStackFrame Frame(Info, SourceLocation(), FD, /*This*/ nullptr, + /*ArgValues*/ nullptr); APValue ResultScratch; Evaluate(ResultScratch, Info, E); diff --git a/clang/test/SemaCXX/constant-expression-cxx1z.cpp b/clang/test/SemaCXX/constant-expression-cxx1z.cpp index 2b366adf2e914..7770e92c63310 100644 --- a/clang/test/SemaCXX/constant-expression-cxx1z.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx1z.cpp @@ -59,3 +59,112 @@ void test() { else if constexpr (v) {} } } + +// Check that assignment operators evaluate their operands right-to-left. +namespace EvalOrder { + template struct lvalue { + T t; + constexpr T &get() { return t; } + }; + + struct UserDefined { + int n = 0; + constexpr UserDefined &operator=(const UserDefined&) { return *this; } + constexpr UserDefined &operator+=(const UserDefined&) { return *this; } + constexpr void operator<<(const UserDefined&) const {} + constexpr void operator>>(const UserDefined&) const {} + constexpr void operator+(const UserDefined&) const {} + constexpr void operator[](int) const {} + }; + constexpr UserDefined ud; + + struct NonMember {}; + constexpr void operator+=(NonMember, NonMember) {} + constexpr void operator<<(NonMember, NonMember) {} + constexpr void operator>>(NonMember, NonMember) {} + constexpr void operator+(NonMember, NonMember) {} + constexpr NonMember nm; + + constexpr void f(...) {} + + // Helper to ensure that 'a' is evaluated before 'b'. + struct seq_checker { + bool done_a = false; + bool done_b = false; + + template constexpr T &&a(T &&v) { + done_a = true; + return (T &&)v; + } + template constexpr T &&b(T &&v) { + if (!done_a) + throw "wrong"; + done_b = true; + return (T &&)v; + } + + constexpr bool ok() { return done_a && done_b; } + }; + + // SEQ(expr), where part of the expression is tagged A(...) and part is + // tagged B(...), checks that A is evaluated before B. + #define A sc.a + #define B sc.b + #define SEQ(...) static_assert([](seq_checker sc) { void(__VA_ARGS__); return sc.ok(); }({})) + + // Longstanding sequencing rules. + SEQ((A(1), B(2))); + SEQ((A(true) ? B(2) : throw "huh?")); + SEQ((A(false) ? throw "huh?" : B(2))); + SEQ(A(true) && B(true)); + SEQ(A(false) || B(true)); + + // From P0145R3: + + // Rules 1 and 2 have no effect ('b' is not an expression). + + // Rule 3: a->*b + SEQ(A(ud).*B(&UserDefined::n)); + SEQ(A(&ud)->*B(&UserDefined::n)); + + // Rule 4: a(b1, b2, b3) + SEQ(A(f)(B(1), B(2), B(3))); + + // Rule 5: b = a, b @= a + SEQ(B(lvalue().get()) = A(0)); + SEQ(B(lvalue().get()) = A(ud)); + SEQ(B(lvalue().get()) += A(0)); + SEQ(B(lvalue().get()) += A(ud)); + SEQ(B(lvalue().get()) += A(nm)); + + // Rule 6: a[b] + constexpr int arr[3] = {}; + SEQ(A(arr)[B(0)]); + SEQ(A(+arr)[B(0)]); + SEQ(A(0)[B(arr)]); + SEQ(A(0)[B(+arr)]); + SEQ(A(ud)[B(0)]); + + // Rule 7: a << b + SEQ(A(1) << B(2)); + SEQ(A(ud) << B(ud)); + SEQ(A(nm) << B(nm)); + + // Rule 8: a >> b + SEQ(A(1) >> B(2)); + SEQ(A(ud) >> B(ud)); + SEQ(A(nm) >> B(nm)); + + // No particular order of evaluation is specified in other cases, but we in + // practice evaluate left-to-right. + // FIXME: Technically we're expected to check for undefined behavior due to + // unsequenced read and modification and treat it as non-constant due to UB. + SEQ(A(1) + B(2)); + SEQ(A(ud) + B(ud)); + SEQ(A(nm) + B(nm)); + SEQ(f(A(1), B(2))); + + #undef SEQ + #undef A + #undef B +} diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index 3c546eb409dee..9c39e396edd48 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -807,6 +807,7 @@

C++17 implementation status

operator&&, operator||, and operator, functions using expression syntax are no longer guaranteed to be destroyed in reverse construction order in that ABI. +This is not fully supported during constant expression evaluation until Clang 12.
(10): Despite being the resolution to a Defect Report, this feature is disabled by default in all language versions, and can be enabled From f22496a9f4cabb97e735314b62731fedb2e01e50 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 5 Oct 2020 16:35:23 -0700 Subject: [PATCH 096/321] [dotest] Simplify logic to find the Python path Simplify the logic of parsing the lldb -P output to find the python path. This removes the special handling for the LLDB.framework case and instead of pattern matching known errors focus on finding a directory path that contains an __init__.py. Differential revision: https://reviews.llvm.org/D88840 --- lldb/packages/Python/lldbsuite/test/dotest.py | 92 ++++++------------- 1 file changed, 30 insertions(+), 62 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py index b4eddda914033..922d7c9377ee2 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest.py +++ b/lldb/packages/Python/lldbsuite/test/dotest.py @@ -545,68 +545,33 @@ def setupSysPath(): configuration.skip_categories.append("lldb-vscode") lldbPythonDir = None # The directory that contains 'lldb/__init__.py' - if configuration.lldb_framework_path: - lldbtest_config.lldb_framework_path = configuration.lldb_framework_path - candidatePath = os.path.join( - configuration.lldb_framework_path, 'Resources', 'Python') - if os.path.isfile(os.path.join(candidatePath, 'lldb/__init__.py')): - lldbPythonDir = candidatePath - if not lldbPythonDir: - print( - 'Resources/Python/lldb/__init__.py was not found in ' + - configuration.lldb_framework_path) - sys.exit(-1) - else: - # If our lldb supports the -P option, use it to find the python path: - init_in_python_dir = os.path.join('lldb', '__init__.py') - - lldb_dash_p_result = subprocess.check_output( - [lldbtest_config.lldbExec, "-P"], stderr=subprocess.STDOUT, universal_newlines=True) - - if lldb_dash_p_result and not lldb_dash_p_result.startswith( - ("<", "lldb: invalid option:")) and not lldb_dash_p_result.startswith("Traceback"): - lines = lldb_dash_p_result.splitlines() - - # Workaround for readline vs libedit issue on FreeBSD. If stdout - # is not a terminal Python executes - # rl_variable_bind ("enable-meta-key", "off"); - # This produces a warning with FreeBSD's libedit because the - # enable-meta-key variable is unknown. Not an issue on Apple - # because cpython commit f0ab6f9f0603 added a #ifndef __APPLE__ - # around the call. See http://bugs.python.org/issue19884 for more - # information. For now we just discard the warning output. - if len(lines) >= 1 and lines[0].startswith( - "bind: Invalid command"): - lines.pop(0) - - # Taking the last line because lldb outputs - # 'Cannot read termcap database;\nusing dumb terminal settings.\n' - # before the path - if len(lines) >= 1 and os.path.isfile( - os.path.join(lines[-1], init_in_python_dir)): - lldbPythonDir = lines[-1] - if "freebsd" in sys.platform or "linux" in sys.platform: - os.environ['LLDB_LIB_DIR'] = os.path.join( - lldbPythonDir, '..', '..') - - if not lldbPythonDir: - print( - "Unable to load lldb extension module. Possible reasons for this include:") - print(" 1) LLDB was built with LLDB_ENABLE_PYTHON=0") - print( - " 2) PYTHONPATH and PYTHONHOME are not set correctly. PYTHONHOME should refer to") - print( - " the version of Python that LLDB built and linked against, and PYTHONPATH") - print( - " should contain the Lib directory for the same python distro, as well as the") - print(" location of LLDB\'s site-packages folder.") - print( - " 3) A different version of Python than that which was built against is exported in") - print(" the system\'s PATH environment variable, causing conflicts.") - print( - " 4) The executable '%s' could not be found. Please check " % - lldbtest_config.lldbExec) - print(" that it exists and is executable.") + + # If our lldb supports the -P option, use it to find the python path: + lldb_dash_p_result = subprocess.check_output([lldbtest_config.lldbExec, "-P"], universal_newlines=True) + if lldb_dash_p_result: + for line in lldb_dash_p_result.splitlines(): + if os.path.isdir(line) and os.path.exists(os.path.join(line, 'lldb', '__init__.py')): + lldbPythonDir = line + break + + if not lldbPythonDir: + print( + "Unable to load lldb extension module. Possible reasons for this include:") + print(" 1) LLDB was built with LLDB_ENABLE_PYTHON=0") + print( + " 2) PYTHONPATH and PYTHONHOME are not set correctly. PYTHONHOME should refer to") + print( + " the version of Python that LLDB built and linked against, and PYTHONPATH") + print( + " should contain the Lib directory for the same python distro, as well as the") + print(" location of LLDB\'s site-packages folder.") + print( + " 3) A different version of Python than that which was built against is exported in") + print(" the system\'s PATH environment variable, causing conflicts.") + print( + " 4) The executable '%s' could not be found. Please check " % + lldbtest_config.lldbExec) + print(" that it exists and is executable.") if lldbPythonDir: lldbPythonDir = os.path.normpath(lldbPythonDir) @@ -620,6 +585,9 @@ def setupSysPath(): lldbPythonDir = os.path.abspath(lldbPythonDir) + if "freebsd" in sys.platform or "linux" in sys.platform: + os.environ['LLDB_LIB_DIR'] = os.path.join(lldbPythonDir, '..', '..') + # If tests need to find LLDB_FRAMEWORK, now they can do it os.environ["LLDB_FRAMEWORK"] = os.path.dirname( os.path.dirname(lldbPythonDir)) From b9888980132e5511e85d4172a46e02475957298b Mon Sep 17 00:00:00 2001 From: Serguei Katkov Date: Fri, 2 Oct 2020 10:40:52 +0700 Subject: [PATCH 097/321] [GVN LoadPRE] Extend the scope of optimization by using context to prove safety of speculation Use context to prove that load can be safely executed at a point where load is being hoisted. Postpone the decision about safety of speculative load execution till the moment we know where we hoist load and check safety at that context. Reviewers: nikic, fhahn, mkazantsev, lebedev.ri, efriedma, reames Reviewed By: reames, mkazantsev Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D88725 --- llvm/lib/Transforms/Scalar/GVN.cpp | 21 ++++++++--- llvm/test/Transforms/GVN/loadpre-context.ll | 39 ++++++++++++--------- 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index c25fdd44dcf9f..4cb95425678c4 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1133,7 +1133,6 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // backwards through predecessors if needed. BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; - bool IsSafeToSpeculativelyExecute = isSafeToSpeculativelyExecute(LI); // Check that there is no implicit control flow instructions above our load in // its block. If there is an instruction that doesn't always pass the @@ -1150,8 +1149,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // because if the index is out of bounds we should deoptimize rather than // access the array. // Check that there is no guard in this block above our instruction. - if (!IsSafeToSpeculativelyExecute && ICF->isDominatedByICFIFromSameBlock(LI)) - return false; + bool MustEnsureSafetyOfSpeculativeExecution = + ICF->isDominatedByICFIFromSameBlock(LI); + while (TmpBB->getSinglePredecessor()) { TmpBB = TmpBB->getSinglePredecessor(); if (TmpBB == LoadBB) // Infinite (unreachable) loop. @@ -1168,8 +1168,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, return false; // Check that there is no implicit control flow in a block above. - if (!IsSafeToSpeculativelyExecute && ICF->hasICF(TmpBB)) - return false; + MustEnsureSafetyOfSpeculativeExecution = + MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(TmpBB); } assert(TmpBB); @@ -1241,6 +1241,17 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (NumUnavailablePreds != 1) return false; + // Now we know where we will insert load. We must ensure that it is safe + // to speculatively execute the load at that points. + if (MustEnsureSafetyOfSpeculativeExecution) { + if (CriticalEdgePred.size()) + if (!isSafeToSpeculativelyExecute(LI, LoadBB->getFirstNonPHI(), DT)) + return false; + for (auto &PL : PredLoads) + if (!isSafeToSpeculativelyExecute(LI, PL.first->getTerminator(), DT)) + return false; + } + // Split critical edges, and update the unavailable predecessors accordingly. for (BasicBlock *OrigPred : CriticalEdgePred) { BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB); diff --git a/llvm/test/Transforms/GVN/loadpre-context.ll b/llvm/test/Transforms/GVN/loadpre-context.ll index 8c9c212128429..50a43b0e91f85 100644 --- a/llvm/test/Transforms/GVN/loadpre-context.ll +++ b/llvm/test/Transforms/GVN/loadpre-context.ll @@ -1,18 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -gvn --basic-aa -S | FileCheck %s -; load may be speculated, adress is not null using context search. +; load may be speculated, address is not null using context search. ; There is a critical edge. define i32 @loadpre_critical_edge(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) { ; CHECK-LABEL: @loadpre_critical_edge( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null -; CHECK-NEXT: br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[HEADER:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[ENTRY_HEADER_CRIT_EDGE:%.*]] +; CHECK: entry.header_crit_edge: +; CHECK-NEXT: [[V_PRE:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ] -; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]]) -; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[ARG]], align 4 -; CHECK-NEXT: [[SUM:%.*]] = add i32 [[NEW_V]], [[V]] +; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V_PRE]], [[ENTRY_HEADER_CRIT_EDGE]] ], [ [[SUM:%.*]], [[HEADER]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_HEADER_CRIT_EDGE]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ] +; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @ro_foo(i32 [[IV]]) +; CHECK-NEXT: [[SUM]] = add i32 [[NEW_V]], [[V]] ; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]] @@ -28,7 +31,8 @@ entry: header: %iv = phi i32 [0, %entry], [%iv.next, %header] - %new_v = call i32 @foo(i32 %iv) +; Call prevents to move load over due to it does not guarantee to return. + %new_v = call i32 @ro_foo(i32 %iv) readnone %v = load i32, i32* %arg %sum = add i32 %new_v, %v store i32 %sum, i32* %arg @@ -43,19 +47,20 @@ null_exit: ret i32 0 } -; load may be speculated, adress is not null using context search. +; load may be speculated, address is not null using context search. define i32 @loadpre_basic(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) { ; CHECK-LABEL: @loadpre_basic( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null ; CHECK-NEXT: br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[PREHEADER:%.*]] ; CHECK: preheader: +; CHECK-NEXT: [[V_PRE:%.*]] = load i32, i32* [[ARG]], align 4 ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: +; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V_PRE]], [[PREHEADER]] ], [ [[SUM:%.*]], [[HEADER]] ] ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ] -; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]]) -; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[ARG]], align 4 -; CHECK-NEXT: [[SUM:%.*]] = add i32 [[NEW_V]], [[V]] +; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @ro_foo(i32 [[IV]]) +; CHECK-NEXT: [[SUM]] = add i32 [[NEW_V]], [[V]] ; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]] @@ -74,7 +79,8 @@ preheader: header: %iv = phi i32 [0, %preheader], [%iv.next, %header] - %new_v = call i32 @foo(i32 %iv) +; Call prevents to move load over due to it does not guarantee to return. + %new_v = call i32 @ro_foo(i32 %iv) readnone %v = load i32, i32* %arg %sum = add i32 %new_v, %v store i32 %sum, i32* %arg @@ -89,7 +95,7 @@ null_exit: ret i32 0 } -; load cannot be speculated, adress is not null check does not dominate the loop. +; load cannot be speculated, check "address is not null" does not dominate the loop. define i32 @loadpre_maybe_null(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N, i1 %c) { ; CHECK-LABEL: @loadpre_maybe_null( ; CHECK-NEXT: entry: @@ -101,7 +107,7 @@ define i32 @loadpre_maybe_null(i32* align 8 dereferenceable_or_null(48) %arg, i3 ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ] -; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]]) +; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @ro_foo(i32 [[IV]]) ; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[ARG]], align 4 ; CHECK-NEXT: [[SUM:%.*]] = add i32 [[NEW_V]], [[V]] ; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4 @@ -125,7 +131,8 @@ preheader: header: %iv = phi i32 [0, %preheader], [%iv.next, %header] - %new_v = call i32 @foo(i32 %iv) +; Call prevents to move load over due to it does not guarantee to return. + %new_v = call i32 @ro_foo(i32 %iv) readnone %v = load i32, i32* %arg %sum = add i32 %new_v, %v store i32 %sum, i32* %arg @@ -141,4 +148,4 @@ null_exit: } ; Does not guarantee that returns. -declare i32 @foo(i32) readnone +declare i32 @ro_foo(i32) readnone From 2a078c3072043541ee0595aea6c8d7909f94c6f9 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 8 Sep 2020 10:10:36 -0500 Subject: [PATCH 098/321] [AttributeFuncs] Consider `align` in `typeIncompatible` Alignment attributes need to be dropped for non-pointer values. This also introduces a check into the verifier to ensure you don't use `align` on anything but a pointer. Test needed to be adjusted accordingly. Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D87304 --- llvm/lib/IR/Attributes.cpp | 1 + llvm/test/Bitcode/attributes-3.3.ll | 4 ++-- llvm/test/Bitcode/attributes-3.3.ll.bc | Bin 1592 -> 2768 bytes llvm/test/Bitcode/attributes.ll | 4 ++-- llvm/test/Transforms/DeadArgElim/returned.ll | 7 +++++++ .../LoopSimplify/unreachable-loop-pred.ll | 2 +- llvm/test/Verifier/align.ll | 13 +++++++++++++ llvm/test/Verifier/byref.ll | 2 +- 8 files changed, 27 insertions(+), 6 deletions(-) create mode 100644 llvm/test/Verifier/align.ll diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index a3940c0fa7fb5..ecb0bd693edd4 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -1848,6 +1848,7 @@ AttrBuilder AttributeFuncs::typeIncompatible(Type *Ty) { .addAttribute(Attribute::NoAlias) .addAttribute(Attribute::NoCapture) .addAttribute(Attribute::NonNull) + .addAlignmentAttr(1) // the int here is ignored .addDereferenceableAttr(1) // the int here is ignored .addDereferenceableOrNullAttr(1) // the int here is ignored .addAttribute(Attribute::ReadNone) diff --git a/llvm/test/Bitcode/attributes-3.3.ll b/llvm/test/Bitcode/attributes-3.3.ll index dc7834eaa1611..2f36031f56174 100644 --- a/llvm/test/Bitcode/attributes-3.3.ll +++ b/llvm/test/Bitcode/attributes-3.3.ll @@ -101,8 +101,8 @@ define void @f16() sspreq ret void; } -define void @f17(i8 align 4 %0) -; CHECK: define void @f17(i8 align 4 %0) +define void @f17(i8* align 4 %0) +; CHECK: define void @f17(i8* align 4 %0) { ret void; } diff --git a/llvm/test/Bitcode/attributes-3.3.ll.bc b/llvm/test/Bitcode/attributes-3.3.ll.bc index 5dd71864ea4fb4589a28249bdab79320c4779851..f4d07b83127df56e2ecb5a1d45fd29d02892bfa6 100644 GIT binary patch literal 2768 zcmeH|e@q+a8OI-U7vBNq?)cHv1lu`7hdj2CI~$CNp)tNRG65#DRTI-XVG}q(M2I1j zIBB&V_!7GvnjDr@Y|9_OvMT8xm8+qQRaFO=q%8AT(nTrjA_U?`lZr+xr4?JVWbccE z+rReT{@bIw_n!Cb_s8dX-XC^LlMCf}0J{L7MJGIS`1|k54PT5mXErMJ8l;(Z04LJ` zGSkd}KCm0TRDaRAnpv_|%ULUjG^*}vyXbrq`I8ofdoImAl<8JEYo3GF*JW7U5l>*o zTy!ueJjm4yrM1q|Z{_jQp$Pv+jn%E{ODrW(@#WYH=lGdfH0~=rKogsao?CU*7?49>fJ*gF6pXbZo zNrPIHv!_wcatC~FzMejtt`W_aV)B~0ILz_61Fi0ndiu3=jm_L%OwOr`zu;WC1MP0U zfqp$*Q*ORgOx{)(f64iB2Rhv&4fK_CO@;YxF}a{F{)$tkR9;t1w!j{9J@cOO=PSSa z!{&nvkDhw{-g3q7A7=ga-h=UpZ}0od4{RSKuFC9Xr~%*+NzypQ{BxcLK+h9P6D}5# zX`tX8xos{u`}|oRz?H0`lAeraWHSI6DMh3I&ezj4U+KhyRC34xfSxBt`aG*N^_7_v z&|T)25KL+&3{%DdSa$jVf_X!WVDORIHViZG0+>uzQHfxV79jjRDFwltA`lF*qac_! z1U+hF+ED=~H^g%^)DPgp=X2W-^HT;L34jiE z(r{dBJ|5uGjsht5HVPx9;=o8%7^yVX2qFCgBYl~9jKt#b(Z!H7j5H>o6t(XtI7KCA zFj6X9j8uY=5PH%u()av4z}+VTV5HRyjD(gXX&9+Vp9Aphj)JpvbJ~Tf+LVN*dmB&3 zrT7WrgNL7r;Kx>23gp(-7Hj)AgX7j>OrD95ghCdP%qKcDNi^MpiT#f zzF;`Ohw~HJ%zBhr64)+*MTd36%hoO1PO0ck9DF!{JlI{olO5!Q5YDKAU9L z1)^nk)UYyYNNksP{D$jh!%V;Nx?g)cDpk%(mBZWRMBH&+azqH{`S?CG>B9l%Ey5X# z>)z`K2b#h?flzNd74(IB_2J%;aQ@tP+55mg@G>h=X4%Cq@yt4kg0~p&Oqr%@Oz)cw zclu3t77Z&)#*Z~7S>Nc0N&7E^oC)tUPXh6UPX?*raAW;ul*Jh^9W`C|8}6Eo3sc4) zX-pW{Y~1i`pFb0FB;wAPRE-kyaJp(maz;=!K6o_)MH)VdcxdXhh3XmZe_@$p)+d=& zFY~ETH1&RsX=Ul@`6c550`TNJW1;HtP=OkE3`&)Wc-6#IP87{ z3g`d*8nfOv>!!%LwXi2qx&@*?WxR+9t&D4>U2|JSgJ-Tk4cp`;sa}j zYol3rYw%WDGaHjR*j={lV?Oet)Uqo$!R$v~W)pANo9KMu4NoPiE{2>J(1H=wBcUp^ zc+t29>3%+WZRC>uY5(`pf7<^r5BS2&C8xAYu4K;bIqgXK?`wcBBGf0MF>85RtB&CI z-8{fv^tCcvp$ZQOz4w5UwnaE%2Nd!V(ZqQatx5(19)*6D9KPXEGNR;!P^;)(Bj=;F z3Uh(n+Nf2yF;d@nSjo9WI^H_01w@J%T1IBv+qH z9i6=$U9zoA6iS6sk#9r49I=a{-)|MmTHE}N@^YKKtW9pMXz`boi)}~B+m6`ein7km nzGKb3Z6|u0-5tG0j<>co`+IwPI$B;t+P7?_w$jc{9Ls+IHFN#H literal 1592 zcmeH_OKcle6o$`uCNtw?98cl_Tx`-id11sud8j5VOr4n+IVF*tE?C=9RAmvT4u#6) zA;#_`ovIEhQB@VZNeo5afQo2bRj9~LoMMTRRYWUV1y@y}8z4dj#50`hVat*oBhB1T zzyAB5Gxr`%FK??7z$pfxB}>2o0PXk+nXb2nYuqR=4?MHPm{L!ZxTk|VHEGBZhqlzB zneu=#Bzi}i2S&Q1&8O9_>On)=qrTWJ)Wo*Q;&L&3bq6D1!VB;`u0D%EHO{IWlrrAG zc6WE*fP#x_gAKkQ8nbNT50rKx>a)6F$;Gw9I^XGpDR#xyV?OIHH3-XJh5IP%hfTil zDMxY%s#c)%YA9q-;v{< z?2NWNE|#3k)9SrmZriNqWusyesh69ClLWwpH3m9GCz+b(Je}{E__<@QoL1V$tXzv} zj>%7S!4WRnMxqB`M2Wz=k84~H?$OUMVBiYg0#1&IP?paF#Hq9pU;#LUzL8;o0D_?& zKs%iUX+j2QMTlEj)Db{eI54Q)TTpDf9YyU6fhOF8&^PiJz%+s#g#|FG8K46pZXLug zwKuw=&3Fq0Ac|l|(TQ$EVF5d;h+s!y0Xym`gho`OQT>bq3|hI-6|HPX(aMb|KSHCc zdkA)vfFw;|jb{<;u6U%zMnbcwjsEFB6)6xmOYeD5H2l@u{zD2Avh*QyJ*OGMv9xgH z+?@0cxWAKI?;EXylGd?Sd1Q0Nf2gmiVLR@-#)fKoD947brE@Q(gjYr3SoB0HCA^Yi zkETzg2yJPO06dCouwMx#6+LnNg?^=!d{9q(eH4{hub%k!EGn}P>xmyqs2uj|iC=i4 ztTu`BNryIf(}>ldBg*9m#Q9lX`{-sSwzEK#D?L(VJiiv(T76QABz^0#txNl)$YLJh za!87N;H$>AK8Z+?FY*XiUY9nKzFO?orI)3R@%(48TdVWki=J3*KwUg3Z7i}0?2+D} rhhrT7^M88|bmBeSFn>F2pLwvIjyFJp{QDiyvG;Hlt3r6gK&H>%LqLo4 diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll index 409528230a7da..d6ea794cf6227 100644 --- a/llvm/test/Bitcode/attributes.ll +++ b/llvm/test/Bitcode/attributes.ll @@ -98,8 +98,8 @@ define void @f16() sspreq ret void; } -define void @f17(i8 align 4 %0) -; CHECK: define void @f17(i8 align 4 %0) +define void @f17(i8* align 4 %0) +; CHECK: define void @f17(i8* align 4 %0) { ret void; } diff --git a/llvm/test/Transforms/DeadArgElim/returned.ll b/llvm/test/Transforms/DeadArgElim/returned.ll index f9d649498e4b0..0b00a116624bc 100644 --- a/llvm/test/Transforms/DeadArgElim/returned.ll +++ b/llvm/test/Transforms/DeadArgElim/returned.ll @@ -43,6 +43,12 @@ define internal %Ty* @test5(%Ty* %this) { ret %Ty* %this } +; Drop all these attributes +; CHECK-LABEL: define internal void @test6 +define internal align 8 dereferenceable_or_null(2) noalias i8* @test6() { + ret i8* null +} + define %Ty* @caller(%Ty* %this) { %1 = call %Ty* @test1(%Ty* %this) %2 = call %Ty* @test2(%Ty* %this) @@ -51,5 +57,6 @@ define %Ty* @caller(%Ty* %this) { ; ...instead, drop 'returned' form the call site ; CHECK: call void @test5(%Ty* %this) %5 = call %Ty* @test5(%Ty* returned %this) + %6 = call i8* @test6() ret %Ty* %this } diff --git a/llvm/test/Transforms/LoopSimplify/unreachable-loop-pred.ll b/llvm/test/Transforms/LoopSimplify/unreachable-loop-pred.ll index 1e92ee4ee3bc6..d116be0e4cd8b 100644 --- a/llvm/test/Transforms/LoopSimplify/unreachable-loop-pred.ll +++ b/llvm/test/Transforms/LoopSimplify/unreachable-loop-pred.ll @@ -22,7 +22,7 @@ foo: ; When loopsimplify generates dedicated exit block for blocks that are landing ; pads (i.e. innerLoopExit in this test), we should not get confused with the ; unreachable pred (unreachableB) to innerLoopExit. -define align 8 void @baz(i32 %trip) personality i32* ()* @wobble { +define void @baz(i32 %trip) personality i32* ()* @wobble { entry: br label %outerHeader diff --git a/llvm/test/Verifier/align.ll b/llvm/test/Verifier/align.ll new file mode 100644 index 0000000000000..762249aa6b11f --- /dev/null +++ b/llvm/test/Verifier/align.ll @@ -0,0 +1,13 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: Wrong types for attribute: inalloca nest noalias nocapture nonnull readnone readonly byref(i32) byval(i32) preallocated(i32) sret(i32) align 1 dereferenceable(1) dereferenceable_or_null(1) +; CHECK-NEXT: @align_non_pointer1 +define void @align_non_pointer1(i32 align 4 %a) { + ret void +} + +; CHECK: Wrong types for attribute: inalloca nest noalias nocapture nonnull readnone readonly signext zeroext byref(void) byval(void) preallocated(void) sret(void) align 1 dereferenceable(1) dereferenceable_or_null(1) +; CHECK-NEXT: @align_non_pointer2 +define align 4 void @align_non_pointer2(i32 %a) { + ret void +} diff --git a/llvm/test/Verifier/byref.ll b/llvm/test/Verifier/byref.ll index 5e7d5873a8db3..d71fb19d7549a 100644 --- a/llvm/test/Verifier/byref.ll +++ b/llvm/test/Verifier/byref.ll @@ -56,7 +56,7 @@ define void @byref_nest(i32* byref(i32) nest) { ret void } -; CHECK: Wrong types for attribute: inalloca nest noalias nocapture nonnull readnone readonly sret byref(i32) byval(i32) preallocated(i32) dereferenceable(1) dereferenceable_or_null(1) +; CHECK: Wrong types for attribute: inalloca nest noalias nocapture nonnull readnone readonly sret byref(i32) byval(i32) preallocated(i32) align 1 dereferenceable(1) dereferenceable_or_null(1) ; CHECK-NEXT: void (i32)* @byref_non_pointer define void @byref_non_pointer(i32 byref(i32)) { ret void From ef48436e62732e61cea5dde9b35b63b3ce8126f7 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 8 Sep 2020 10:13:11 -0500 Subject: [PATCH 099/321] [AttributeFuncs] Consider `noundef` in `typeIncompatible` Drop `noundef` for return values that are replaced by void and make it illegal to put `noundef` on a void value. Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D87306 --- llvm/lib/IR/Attributes.cpp | 4 ++++ llvm/test/Transforms/DeadArgElim/returned.ll | 2 +- llvm/test/Verifier/align.ll | 2 +- llvm/test/Verifier/noundef.ll | 7 +++++++ 4 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Verifier/noundef.ll diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index ecb0bd693edd4..b21d452748424 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -1859,6 +1859,10 @@ AttrBuilder AttributeFuncs::typeIncompatible(Type *Ty) { .addByValAttr(Ty) .addByRefAttr(Ty); + // Some attributes can apply to all "values" but there are no `void` values. + if (Ty->isVoidTy()) + Incompatible.addAttribute(Attribute::NoUndef); + return Incompatible; } diff --git a/llvm/test/Transforms/DeadArgElim/returned.ll b/llvm/test/Transforms/DeadArgElim/returned.ll index 0b00a116624bc..e96faea6e9f61 100644 --- a/llvm/test/Transforms/DeadArgElim/returned.ll +++ b/llvm/test/Transforms/DeadArgElim/returned.ll @@ -45,7 +45,7 @@ define internal %Ty* @test5(%Ty* %this) { ; Drop all these attributes ; CHECK-LABEL: define internal void @test6 -define internal align 8 dereferenceable_or_null(2) noalias i8* @test6() { +define internal align 8 dereferenceable_or_null(2) noundef noalias i8* @test6() { ret i8* null } diff --git a/llvm/test/Verifier/align.ll b/llvm/test/Verifier/align.ll index 762249aa6b11f..38ce3772e7652 100644 --- a/llvm/test/Verifier/align.ll +++ b/llvm/test/Verifier/align.ll @@ -6,7 +6,7 @@ define void @align_non_pointer1(i32 align 4 %a) { ret void } -; CHECK: Wrong types for attribute: inalloca nest noalias nocapture nonnull readnone readonly signext zeroext byref(void) byval(void) preallocated(void) sret(void) align 1 dereferenceable(1) dereferenceable_or_null(1) +; CHECK: Wrong types for attribute: inalloca nest noalias nocapture noundef nonnull readnone readonly signext zeroext byref(void) byval(void) preallocated(void) sret(void) align 1 dereferenceable(1) dereferenceable_or_null(1) ; CHECK-NEXT: @align_non_pointer2 define align 4 void @align_non_pointer2(i32 %a) { ret void diff --git a/llvm/test/Verifier/noundef.ll b/llvm/test/Verifier/noundef.ll new file mode 100644 index 0000000000000..7b199cd6d2dee --- /dev/null +++ b/llvm/test/Verifier/noundef.ll @@ -0,0 +1,7 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: Wrong types for attribute: inalloca nest noalias nocapture noundef nonnull readnone readonly signext zeroext byref(void) byval(void) preallocated(void) sret(void) align 1 dereferenceable(1) dereferenceable_or_null(1) +; CHECK-NEXT: @noundef_void +define noundef void @noundef_void() { + ret void +} From 01baeda7ca6645f5d8455733b110c89203a61ccf Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 5 Oct 2020 21:28:07 -0700 Subject: [PATCH 100/321] [JITLink][ELF] Handle BSS sections, improve some error messages. This patch enables basic BSS section handling, and improves a couple of error messages in the ELF section parsing code. Patch by Christian Schafmeister. Thanks Christian! Differential Revision: https://reviews.llvm.org/D88867 --- llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp | 9 +++++++-- .../JITLink/X86/ELF_x86-64_relocations.s | 13 ++++++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp index 20295434d2e5a..40c7d04378e5c 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp @@ -357,6 +357,9 @@ class ELFLinkGraphBuilder_x86_64 { if (SecRef.sh_type == ELF::SHT_SYMTAB) // TODO: Dynamic? SymTab = SecRef; + } else { + auto &Section = G->createSection(*Name, Prot); + G->createZeroFillBlock(Section, Size, Address, Alignment, 0); } } @@ -480,7 +483,8 @@ class ELFLinkGraphBuilder_x86_64 { return Name.takeError(); auto Section = G->findSectionByName(*Name); if (!Section) - return make_error("Could not find a section", + return make_error("Could not find a section " + + *Name, llvm::inconvertibleErrorCode()); // we only have one for now auto blocks = Section->blocks(); @@ -527,7 +531,8 @@ class ELFLinkGraphBuilder_x86_64 { auto JitSection = G->findSectionByName(*sectName); if (!JitSection) return make_error( - "Could not find a section", llvm::inconvertibleErrorCode()); + "Could not find the JitSection " + *sectName, + llvm::inconvertibleErrorCode()); auto bs = JitSection->blocks(); if (bs.empty()) return make_error( diff --git a/llvm/test/ExecutionEngine/JITLink/X86/ELF_x86-64_relocations.s b/llvm/test/ExecutionEngine/JITLink/X86/ELF_x86-64_relocations.s index ca9c926b32de1..0eef111102640 100644 --- a/llvm/test/ExecutionEngine/JITLink/X86/ELF_x86-64_relocations.s +++ b/llvm/test/ExecutionEngine/JITLink/X86/ELF_x86-64_relocations.s @@ -46,6 +46,17 @@ named_data: .long 42 .size named_data, 4 +# Test BSS / zero-fill section handling. +# llvm-jitlink: *{4}bss_variable = 0 + + .type bss_variable,@object + .bss + .globl bss_variable + .p2align 2 +bss_variable: + .long 0 + .size bss_variable, 4 + .ident "clang version 10.0.0-4ubuntu1 " .section ".note.GNU-stack","",@progbits - .addrsig \ No newline at end of file + .addrsig From 848342b333e76ff1db04d10097470fc454d4a8b7 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 5 Oct 2020 23:35:49 -0500 Subject: [PATCH 101/321] [AttributeFuncs][FIX] Update new tests (D87304) after sret changes --- llvm/test/Verifier/align.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/Verifier/align.ll b/llvm/test/Verifier/align.ll index 38ce3772e7652..dd885a8623b86 100644 --- a/llvm/test/Verifier/align.ll +++ b/llvm/test/Verifier/align.ll @@ -1,12 +1,12 @@ ; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s -; CHECK: Wrong types for attribute: inalloca nest noalias nocapture nonnull readnone readonly byref(i32) byval(i32) preallocated(i32) sret(i32) align 1 dereferenceable(1) dereferenceable_or_null(1) +; CHECK: Wrong types for attribute: inalloca nest noalias nocapture nonnull readnone readonly sret byref(i32) byval(i32) preallocated(i32) align 1 dereferenceable(1) dereferenceable_or_null(1) ; CHECK-NEXT: @align_non_pointer1 define void @align_non_pointer1(i32 align 4 %a) { ret void } -; CHECK: Wrong types for attribute: inalloca nest noalias nocapture noundef nonnull readnone readonly signext zeroext byref(void) byval(void) preallocated(void) sret(void) align 1 dereferenceable(1) dereferenceable_or_null(1) +; CHECK: Wrong types for attribute: inalloca nest noalias nocapture noundef nonnull readnone readonly signext zeroext sret byref(void) byval(void) preallocated(void) align 1 dereferenceable(1) dereferenceable_or_null(1) ; CHECK-NEXT: @align_non_pointer2 define align 4 void @align_non_pointer2(i32 %a) { ret void From bbb0ee6e34db1d8e00367ea03ee1972d1131d1e0 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Tue, 6 Oct 2020 11:18:13 +0700 Subject: [PATCH 102/321] Revert "[SCEV] Prove implicaitons via AddRec start" This reverts commit 69acdfe075fa8eb18781f88f4d0cd1ea40fa6e48. Need to investigate reported miscompiles. --- llvm/include/llvm/Analysis/ScalarEvolution.h | 31 +----- llvm/lib/Analysis/ScalarEvolution.cpp | 103 +++++------------- .../Analysis/ScalarEvolutionTest.cpp | 65 ----------- 3 files changed, 32 insertions(+), 167 deletions(-) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 158257a5aa9a1..febca473776aa 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1677,30 +1677,23 @@ class ScalarEvolution { getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB) const; /// Test whether the condition described by Pred, LHS, and RHS is true - /// whenever the given FoundCondValue value evaluates to true in given - /// Context. If Context is nullptr, then the found predicate is true - /// everywhere. + /// whenever the given FoundCondValue value evaluates to true. bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - const Value *FoundCondValue, bool Inverse, - const Instruction *Context = nullptr); + const Value *FoundCondValue, bool Inverse); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is - /// true in given Context. If Context is nullptr, then the found predicate is - /// true everywhere. + /// true. bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, - const SCEV *FoundRHS, - const Instruction *Context = nullptr); + const SCEV *FoundRHS); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is - /// true in given Context. If Context is nullptr, then the found predicate is - /// true everywhere. + /// true. bool isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, - const SCEV *FoundRHS, - const Instruction *Context = nullptr); + const SCEV *FoundRHS); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is @@ -1747,18 +1740,6 @@ class ScalarEvolution { const SCEV *FoundLHS, const SCEV *FoundRHS); - /// Test whether the condition described by Pred, LHS, and RHS is true - /// whenever the condition described by Pred, FoundLHS, and FoundRHS is - /// true. - /// - /// This routine tries to weaken the known condition basing on fact that - /// FoundLHS is an AddRec. - bool isImpliedCondOperandsViaAddRecStart(ICmpInst::Predicate Pred, - const SCEV *LHS, const SCEV *RHS, - const SCEV *FoundLHS, - const SCEV *FoundRHS, - const Instruction *Context); - /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true. diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 70d37cb73fd16..f3764966f3017 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -9549,16 +9549,15 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, // Try to prove (Pred, LHS, RHS) using isImpliedCond. auto ProveViaCond = [&](const Value *Condition, bool Inverse) { - const Instruction *Context = &BB->front(); - if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, Context)) + if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse)) return true; if (ProvingStrictComparison) { if (!ProvedNonStrictComparison) - ProvedNonStrictComparison = isImpliedCond(NonStrictPredicate, LHS, RHS, - Condition, Inverse, Context); + ProvedNonStrictComparison = + isImpliedCond(NonStrictPredicate, LHS, RHS, Condition, Inverse); if (!ProvedNonEquality) - ProvedNonEquality = isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS, - Condition, Inverse, Context); + ProvedNonEquality = + isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS, Condition, Inverse); if (ProvedNonStrictComparison && ProvedNonEquality) return true; } @@ -9624,8 +9623,7 @@ bool ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - const Value *FoundCondValue, bool Inverse, - const Instruction *Context) { + const Value *FoundCondValue, bool Inverse) { if (!PendingLoopPredicates.insert(FoundCondValue).second) return false; @@ -9636,16 +9634,12 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, if (const BinaryOperator *BO = dyn_cast(FoundCondValue)) { if (BO->getOpcode() == Instruction::And) { if (!Inverse) - return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse, - Context) || - isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse, - Context); + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); } else if (BO->getOpcode() == Instruction::Or) { if (Inverse) - return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse, - Context) || - isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse, - Context); + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); } } @@ -9663,14 +9657,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); - return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, Context); + return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS); } bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, - const SCEV *FoundLHS, const SCEV *FoundRHS, - const Instruction *Context) { + const SCEV *FoundLHS, + const SCEV *FoundRHS) { // Balance the types. if (getTypeSizeInBits(LHS->getType()) < getTypeSizeInBits(FoundLHS->getType())) { @@ -9714,16 +9708,16 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // Check whether the found predicate is the same as the desired predicate. if (FoundPred == Pred) - return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context); + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); // Check whether swapping the found predicate makes it the same as the // desired predicate. if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) { if (isa(RHS)) - return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, Context); + return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS); else - return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), RHS, - LHS, FoundLHS, FoundRHS, Context); + return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), + RHS, LHS, FoundLHS, FoundRHS); } // Unsigned comparison is the same as signed comparison when both the operands @@ -9731,7 +9725,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, if (CmpInst::isUnsigned(FoundPred) && CmpInst::getSignedPredicate(FoundPred) == Pred && isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) - return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context); + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); // Check if we can make progress by sharpening ranges. if (FoundPred == ICmpInst::ICMP_NE && @@ -9768,8 +9762,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, case ICmpInst::ICMP_UGE: // We know V `Pred` SharperMin. If this implies LHS `Pred` // RHS, we're done. - if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin), - Context)) + if (isImpliedCondOperands(Pred, LHS, RHS, V, + getConstant(SharperMin))) return true; LLVM_FALLTHROUGH; @@ -9784,8 +9778,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // // If V `Pred` Min implies LHS `Pred` RHS, we're done. - if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min), - Context)) + if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min))) return true; break; @@ -9793,14 +9786,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_ULE: if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS, - LHS, V, getConstant(SharperMin), Context)) + LHS, V, getConstant(SharperMin))) return true; LLVM_FALLTHROUGH; case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_ULT: if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS, - LHS, V, getConstant(Min), Context)) + LHS, V, getConstant(Min))) return true; break; @@ -9814,12 +9807,11 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // Check whether the actual condition is beyond sufficient. if (FoundPred == ICmpInst::ICMP_EQ) if (ICmpInst::isTrueWhenEqual(Pred)) - if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context)) + if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; if (Pred == ICmpInst::ICMP_NE) if (!ICmpInst::isTrueWhenEqual(FoundPred)) - if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS, - Context)) + if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS)) return true; // Otherwise assume the worst. @@ -9898,44 +9890,6 @@ Optional ScalarEvolution::computeConstantDifference(const SCEV *More, return None; } -bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart( - ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *Context) { - // Try to recognize the following pattern: - // - // FoundRHS = ... - // ... - // loop: - // FoundLHS = {Start,+,W} - // context_bb: // Basic block from the same loop - // known(Pred, FoundLHS, FoundRHS) - // - // If some predicate is known in the context of a loop, it is also known on - // each iteration of this loop, including the first iteration. Therefore, in - // this case, `FoundLHS Pred FoundRHS` implies `Start Pred FoundRHS`. Try to - // prove the original pred using this fact. - if (!Context) - return false; - // Make sure AR varies in the context block. - if (auto *AR = dyn_cast(FoundLHS)) { - if (!AR->getLoop()->contains(Context->getParent())) - return false; - if (!isAvailableAtLoopEntry(FoundRHS, AR->getLoop())) - return false; - return isImpliedCondOperands(Pred, LHS, RHS, AR->getStart(), FoundRHS); - } - - if (auto *AR = dyn_cast(FoundRHS)) { - if (!AR->getLoop()->contains(Context)) - return false; - if (!isAvailableAtLoopEntry(FoundLHS, AR->getLoop())) - return false; - return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, AR->getStart()); - } - - return false; -} - bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS) { @@ -10126,18 +10080,13 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred, bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, - const SCEV *FoundRHS, - const Instruction *Context) { + const SCEV *FoundRHS) { if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; - if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS, - Context)) - return true; - return isImpliedCondOperandsHelper(Pred, LHS, RHS, FoundLHS, FoundRHS) || // ~x < ~y --> x > y diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp index be8941838f71a..ff33495f22711 100644 --- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp +++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp @@ -1251,69 +1251,4 @@ TEST_F(ScalarEvolutionsTest, SCEVgetExitLimitForGuardedLoop) { }); } -TEST_F(ScalarEvolutionsTest, ImpliedViaAddRecStart) { - LLVMContext C; - SMDiagnostic Err; - std::unique_ptr M = parseAssemblyString( - "define void @foo(i32* %p) { " - "entry: " - " %x = load i32, i32* %p, !range !0 " - " br label %loop " - "loop: " - " %iv = phi i32 [ %x, %entry], [%iv.next, %backedge] " - " %ne.check = icmp ne i32 %iv, 0 " - " br i1 %ne.check, label %backedge, label %exit " - "backedge: " - " %iv.next = add i32 %iv, -1 " - " br label %loop " - "exit:" - " ret void " - "} " - "!0 = !{i32 0, i32 2147483647}", - Err, C); - - ASSERT_TRUE(M && "Could not parse module?"); - ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); - - runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { - auto *X = SE.getSCEV(getInstructionByName(F, "x")); - auto *Context = getInstructionByName(F, "iv.next"); - EXPECT_TRUE(SE.isKnownPredicateAt(ICmpInst::ICMP_NE, X, - SE.getZero(X->getType()), Context)); - }); -} - -TEST_F(ScalarEvolutionsTest, UnsignedIsImpliedViaOperations) { - LLVMContext C; - SMDiagnostic Err; - std::unique_ptr M = - parseAssemblyString("define void @foo(i32* %p1, i32* %p2) { " - "entry: " - " %x = load i32, i32* %p1, !range !0 " - " %cond = icmp ne i32 %x, 0 " - " br i1 %cond, label %guarded, label %exit " - "guarded: " - " %y = add i32 %x, -1 " - " ret void " - "exit: " - " ret void " - "} " - "!0 = !{i32 0, i32 2147483647}", - Err, C); - - ASSERT_TRUE(M && "Could not parse module?"); - ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); - - runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { - auto *X = SE.getSCEV(getInstructionByName(F, "x")); - auto *Y = SE.getSCEV(getInstructionByName(F, "y")); - auto *Guarded = getInstructionByName(F, "y")->getParent(); - ASSERT_TRUE(Guarded); - EXPECT_TRUE( - SE.isBasicBlockEntryGuardedByCond(Guarded, ICmpInst::ICMP_ULT, Y, X)); - EXPECT_TRUE( - SE.isBasicBlockEntryGuardedByCond(Guarded, ICmpInst::ICMP_UGT, X, Y)); - }); -} - } // end namespace llvm From ee599bf2a9fe6d79777dbe1912daf2a34cbc14e1 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 5 Oct 2020 23:53:41 -0500 Subject: [PATCH 103/321] [AttributeFuncs][FIX] Update new tests (D87304, D87306) after sret changes Hopefully the last of these, apologies for the noise. --- llvm/test/Verifier/align.ll | 2 +- llvm/test/Verifier/noundef.ll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/Verifier/align.ll b/llvm/test/Verifier/align.ll index dd885a8623b86..872cc27ff891d 100644 --- a/llvm/test/Verifier/align.ll +++ b/llvm/test/Verifier/align.ll @@ -6,7 +6,7 @@ define void @align_non_pointer1(i32 align 4 %a) { ret void } -; CHECK: Wrong types for attribute: inalloca nest noalias nocapture noundef nonnull readnone readonly signext zeroext sret byref(void) byval(void) preallocated(void) align 1 dereferenceable(1) dereferenceable_or_null(1) +; CHECK: Wrong types for attribute: inalloca nest noalias nocapture noundef nonnull readnone readonly signext sret zeroext byref(void) byval(void) preallocated(void) align 1 dereferenceable(1) dereferenceable_or_null(1) ; CHECK-NEXT: @align_non_pointer2 define align 4 void @align_non_pointer2(i32 %a) { ret void diff --git a/llvm/test/Verifier/noundef.ll b/llvm/test/Verifier/noundef.ll index 7b199cd6d2dee..4ac5aaa3ad27e 100644 --- a/llvm/test/Verifier/noundef.ll +++ b/llvm/test/Verifier/noundef.ll @@ -1,6 +1,6 @@ ; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s -; CHECK: Wrong types for attribute: inalloca nest noalias nocapture noundef nonnull readnone readonly signext zeroext byref(void) byval(void) preallocated(void) sret(void) align 1 dereferenceable(1) dereferenceable_or_null(1) +; CHECK: Wrong types for attribute: inalloca nest noalias nocapture noundef nonnull readnone readonly signext sret zeroext byref(void) byval(void) preallocated(void) align 1 dereferenceable(1) dereferenceable_or_null(1) ; CHECK-NEXT: @noundef_void define noundef void @noundef_void() { ret void From 70d9dc867417ac63fe280ab145776f75a9487f0f Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 6 Oct 2020 08:51:49 +0200 Subject: [PATCH 104/321] [AST][RecoveryExpr] Support dependent binary operator in C for error recovery. see the whole context in: https://reviews.llvm.org/D85025 Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D84226 --- clang/include/clang/AST/ASTContext.h | 8 +++++ clang/lib/Sema/SemaExpr.cpp | 44 +++++++++++++++++++++++++++- clang/test/AST/ast-dump-recovery.c | 34 +++++++++++++++++---- clang/test/Sema/error-dependence.c | 9 ++++++ 4 files changed, 88 insertions(+), 7 deletions(-) create mode 100644 clang/test/Sema/error-dependence.c diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index d30cf045f1040..e261e29036e97 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -665,6 +665,14 @@ class ASTContext : public RefCountedBase { const LangOptions& getLangOpts() const { return LangOpts; } + // If this condition is false, typo correction must be performed eagerly + // rather than delayed in many places, as it makes use of dependent types. + // the condition is false for clang's C-only codepath, as it doesn't support + // dependent types yet. + bool isDependenceAllowed() const { + return LangOpts.CPlusPlus || LangOpts.RecoveryAST; + } + const SanitizerBlacklist &getSanitizerBlacklist() const { return *SanitizerBL; } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index e51b276261849..ee41d5f5b37de 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -24,6 +24,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" +#include "clang/AST/OperationKinds.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/Builtins.h" @@ -13683,7 +13684,7 @@ static std::pair CorrectDelayedTyposInBinOp(Sema &S, BinaryOperatorKind Opc, Expr *LHSExpr, Expr *RHSExpr) { ExprResult LHS = LHSExpr, RHS = RHSExpr; - if (!S.getLangOpts().CPlusPlus) { + if (!S.Context.isDependenceAllowed()) { // C cannot handle TypoExpr nodes on either side of a binop because it // doesn't handle dependent types properly, so make sure any TypoExprs have // been dealt with before checking the operands. @@ -14364,6 +14365,47 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc, return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); } + if (getLangOpts().RecoveryAST && + (LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent())) { + assert(!getLangOpts().CPlusPlus); + assert((LHSExpr->containsErrors() || RHSExpr->containsErrors()) && + "Should only occur in error-recovery path."); + if (BinaryOperator::isCompoundAssignmentOp(Opc)) + // C [6.15.16] p3: + // An assignment expression has the value of the left operand after the + // assignment, but is not an lvalue. + return CompoundAssignOperator::Create( + Context, LHSExpr, RHSExpr, Opc, + LHSExpr->getType().getUnqualifiedType(), VK_RValue, OK_Ordinary, + OpLoc, CurFPFeatureOverrides()); + QualType ResultType; + switch (Opc) { + case BO_Assign: + ResultType = LHSExpr->getType().getUnqualifiedType(); + break; + case BO_LT: + case BO_GT: + case BO_LE: + case BO_GE: + case BO_EQ: + case BO_NE: + case BO_LAnd: + case BO_LOr: + // These operators have a fixed result type regardless of operands. + ResultType = Context.IntTy; + break; + case BO_Comma: + ResultType = RHSExpr->getType(); + break; + default: + ResultType = Context.DependentTy; + break; + } + return BinaryOperator::Create(Context, LHSExpr, RHSExpr, Opc, ResultType, + VK_RValue, OK_Ordinary, OpLoc, + CurFPFeatureOverrides()); + } + // Build a built-in binary operation. return CreateBuiltinBinOp(OpLoc, Opc, LHSExpr, RHSExpr); } diff --git a/clang/test/AST/ast-dump-recovery.c b/clang/test/AST/ast-dump-recovery.c index f3a33fdac49b9..66830e072a2ac 100644 --- a/clang/test/AST/ast-dump-recovery.c +++ b/clang/test/AST/ast-dump-recovery.c @@ -42,11 +42,33 @@ void test1() { void test2() { int* ptr; - // FIXME: the top-level expr should be a binary operator. - // CHECK: ImplicitCastExpr {{.*}} contains-errors - // CHECK-NEXT: `-RecoveryExpr {{.*}} contains-errors lvalue - // CHECK-NEXT: |-DeclRefExpr {{.*}} 'ptr' 'int *' - // CHECK-NEXT: `-RecoveryExpr {{.*}} - // CHECK-NEXT: `-DeclRefExpr {{.*}} 'some_func' + // CHECK: BinaryOperator {{.*}} 'int *' contains-errors '=' + // CHECK-NEXT: |-DeclRefExpr {{.*}} 'ptr' 'int *' + // CHECK-NEXT: `-RecoveryExpr {{.*}} + // CHECK-NEXT: `-DeclRefExpr {{.*}} 'some_func' ptr = some_func(); // should not crash + + int compoundOp; + // CHECK: CompoundAssignOperator {{.*}} 'int' contains-errors '+=' + // CHECK-NEXT: |-DeclRefExpr {{.*}} 'compoundOp' + // CHECK-NEXT: `-RecoveryExpr {{.*}} contains-errors + // CHECK-NEXT: `-DeclRefExpr {{.*}} 'some_func' + compoundOp += some_func(); + + // CHECK: BinaryOperator {{.*}} 'int' contains-errors '||' + // CHECK-NEXT: |-RecoveryExpr {{.*}} + // CHECK-NEXT: | `-DeclRefExpr {{.*}} 'some_func' + // CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 1 + some_func() || 1; + + // CHECK: BinaryOperator {{.*}} '' contains-errors ',' + // CHECK-NEXT: |-IntegerLiteral {{.*}} 'int' 1 + // CHECK-NEXT: `-RecoveryExpr {{.*}} + // CHECK-NEXT: `-DeclRefExpr {{.*}} 'some_func' + 1, some_func(); + // CHECK: BinaryOperator {{.*}} 'int' contains-errors ',' + // CHECK-NEXT: |-RecoveryExpr {{.*}} '' + // CHECK-NEXT: | `-DeclRefExpr {{.*}} 'some_func' + // CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 1 + some_func(), 1; } diff --git a/clang/test/Sema/error-dependence.c b/clang/test/Sema/error-dependence.c new file mode 100644 index 0000000000000..a98b021094de3 --- /dev/null +++ b/clang/test/Sema/error-dependence.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -frecovery-ast -fno-recovery-ast-type %s + +int call(int); // expected-note {{'call' declared here}} + +void test1(int s) { + // verify "assigning to 'int' from incompatible type ''" is + // not emitted. + s = call(); // expected-error {{too few arguments to function call}} +} From 4ed47d50ea819fc3cf7341e86e947fef920743df Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Thu, 3 Sep 2020 08:28:57 +0100 Subject: [PATCH 105/321] [SVE][CodeGen] Fix DAGCombiner::ForwardStoreValueToDirectLoad for scalable vectors In DAGCombiner::ForwardStoreValueToDirectLoad I have fixed up some implicit casts from TypeSize -> uint64_t and replaced calls to getVectorNumElements() with getVectorElementCount(). There are some simple cases of forwarding that we can definitely support for scalable vectors, i.e. when the store and load are both scalable vectors and have the same size. I have added tests for the new code paths here: CodeGen/AArch64/sve-forward-st-to-ld.ll Differential Revision: https://reviews.llvm.org/D87098 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 46 +++++++-- .../CodeGen/AArch64/sve-forward-st-to-ld.ll | 99 +++++++++++++++++++ 2 files changed, 135 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f67a06f81c559..c36a085599bfb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14792,8 +14792,8 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); } -static inline int numVectorEltsOrZero(EVT T) { - return T.isVector() ? T.getVectorNumElements() : 0; +static inline ElementCount numVectorEltsOrZero(EVT T) { + return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0); } bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) { @@ -14861,6 +14861,24 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { EVT STMemType = ST->getMemoryVT(); EVT STType = ST->getValue().getValueType(); + // There are two cases to consider here: + // 1. The store is fixed width and the load is scalable. In this case we + // don't know at compile time if the store completely envelops the load + // so we abandon the optimisation. + // 2. The store is scalable and the load is fixed width. We could + // potentially support a limited number of cases here, but there has been + // no cost-benefit analysis to prove it's worth it. + bool LdStScalable = LDMemType.isScalableVector(); + if (LdStScalable != STMemType.isScalableVector()) + return SDValue(); + + // If we are dealing with scalable vectors on a big endian platform the + // calculation of offsets below becomes trickier, since we do not know at + // compile time the absolute size of the vector. Until we've done more + // analysis on big-endian platforms it seems better to bail out for now. + if (LdStScalable && DAG.getDataLayout().isBigEndian()) + return SDValue(); + BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG); BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG); int64_t Offset; @@ -14872,13 +14890,21 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { // the stored value). With Offset=n (for n > 0) the loaded value starts at the // n:th least significant byte of the stored value. if (DAG.getDataLayout().isBigEndian()) - Offset = ((int64_t)STMemType.getStoreSizeInBits() - - (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset; + Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() - + (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) / + 8 - + Offset; // Check that the stored value cover all bits that are loaded. - bool STCoversLD = - (Offset >= 0) && - (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits()); + bool STCoversLD; + + TypeSize LdMemSize = LDMemType.getSizeInBits(); + TypeSize StMemSize = STMemType.getSizeInBits(); + if (LdStScalable) + STCoversLD = (Offset == 0) && LdMemSize == StMemSize; + else + STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <= + StMemSize.getFixedSize()); auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue { if (LD->isIndexed()) { @@ -14899,15 +14925,15 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { // Memory as copy space (potentially masked). if (Offset == 0 && LDType == STType && STMemType == LDMemType) { // Simple case: Direct non-truncating forwarding - if (LDType.getSizeInBits() == LDMemType.getSizeInBits()) + if (LDType.getSizeInBits() == LdMemSize) return ReplaceLd(LD, ST->getValue(), Chain); // Can we model the truncate and extension with an and mask? if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() && !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) { // Mask to size of LDMemType auto Mask = - DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(), - STMemType.getSizeInBits()), + DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(), + StMemSize.getFixedSize()), SDLoc(ST), STType); auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask); return ReplaceLd(LD, Val, Chain); diff --git a/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll b/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll new file mode 100644 index 0000000000000..b38f7da73eac3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll @@ -0,0 +1,99 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +define @sti64ldi64(* nocapture %P, %v) { +; CHECK-LABEL: sti64ldi64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1d { z0.d }, p0, [x0, #1, mul vl] +; CHECK-NEXT: ret +entry: + %arrayidx0 = getelementptr inbounds , * %P, i64 1 + store %v, * %arrayidx0 + %arrayidx1 = getelementptr inbounds , * %P, i64 1 + %0 = load , * %arrayidx1 + ret %0 +} + +define @stf64ldf64(* nocapture %P, %v) { +; CHECK-LABEL: stf64ldf64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1d { z0.d }, p0, [x0, #1, mul vl] +; CHECK-NEXT: ret +entry: + %arrayidx0 = getelementptr inbounds , * %P, i64 1 + store %v, * %arrayidx0 + %arrayidx1 = getelementptr inbounds , * %P, i64 1 + %0 = load , * %arrayidx1 + ret %0 +} + +define @sti32ldi32ext(* nocapture %P, %v) { +; CHECK-LABEL: sti32ldi32ext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtw z1.d, p0/m, z0.d +; CHECK-NEXT: st1w { z0.d }, p0, [x0] +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret +entry: + %0 = trunc %v to + store %0, * %P + %1 = load , * %P + %2 = sext %1 to + ret %2 +} + +define <2 x i64> @sti64ldfixedi64(* nocapture %P, %v) { +; CHECK-LABEL: sti64ldfixedi64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: st1d { z0.d }, p0, [x0, #1, mul vl] +; CHECK-NEXT: ldr q0, [x0, x8] +; CHECK-NEXT: ret +entry: + %arrayidx0 = getelementptr inbounds , * %P, i64 1 + store %v, * %arrayidx0 + %arrayidx1 = bitcast * %arrayidx0 to <2 x i64>* + %0 = load <2 x i64>, <2 x i64>* %arrayidx1 + ret <2 x i64> %0 +} + +define @sti64ldi32(* nocapture %P, %v) { +; CHECK-LABEL: sti64ldi32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1d { z0.d }, p0, [x0, #1, mul vl] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret +entry: + %0 = bitcast * %P to * + %arrayidx0 = getelementptr inbounds , * %P, i64 1 + store %v, * %arrayidx0 + %arrayidx1 = getelementptr inbounds , * %0, i64 1 + %1 = load , * %arrayidx1 + ret %1 +} + +define @stf64ldi64(* nocapture %P, %v) { +; CHECK-LABEL: stf64ldi64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1d { z0.d }, p0, [x0, #1, mul vl] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret +entry: + %0 = bitcast * %P to * + %arrayidx0 = getelementptr inbounds , * %P, i64 1 + store %v, * %arrayidx0 + %arrayidx1 = getelementptr inbounds , * %0, i64 1 + %1 = load , * %arrayidx1 + ret %1 +} From ce4d15fe142eb0e0ed7c998a73b13e7bee45d99e Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Tue, 6 Oct 2020 09:27:24 +0200 Subject: [PATCH 106/321] [lldb] Symlink the Clang resource directory to the LLDB build directory in standalone builds When doing a standalone build (i.e., building just LLDB against an existing LLVM/Clang installation), LLDB is currently unable to find any Clang resource directory that contains all the builtin headers we need to parse real source code. This causes several tests that actually parse source code on disk within the expression parser to fail (most notably nearly all the import-std-module tests). The reason why LLDB can't find the resource directory is that we search based on the path of the LLDB shared library path. We assumed that the Clang resource directory is in the same prefix and has the same relative path to the LLDB shared library (e.g., `../clang/10.0.0/include`). However for a standalone build where the existing Clang can be anywhere on the disk, so we can't just rely on the hardcoded relative paths to the LLDB shared library. It seems we can either solve this by copying the resource directory to the LLDB installation, symlinking it there or we pass the path to the Clang installation to the code that is trying to find the resource directory. When building the LLDB framework we currently copy the resource directory over to the framework folder (this is why the import-std-module are not failing on the Green Dragon standalone bot). This patch symlinks the resource directory of Clang into the LLDB build directory. The reason for that is simply that this is only needed when running LLDB from the build directory. Once LLDB and Clang/LLVM are installed the already existing logic can find the Clang resource directory by searching relative to the LLDB shared library. Reviewed By: kastiglione, JDevlieghere Differential Revision: https://reviews.llvm.org/D88581 --- lldb/cmake/modules/LLDBConfig.cmake | 24 ++++++++++++++++++++++++ lldb/cmake/modules/LLDBFramework.cmake | 15 +-------------- lldb/source/API/CMakeLists.txt | 21 ++++++++++++++++++++- 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index af94e6e223d9a..5fbc89892c73c 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -250,6 +250,30 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) endif() endif() + +# If LLDB is building against a prebuilt Clang, then the Clang resource +# directory that LLDB is using for its embedded Clang instance needs to point +# to the resource directory of the used Clang installation. +if (NOT TARGET clang-resource-headers) + set(LLDB_CLANG_RESOURCE_DIR_NAME "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}") + # Iterate over the possible places where the external resource directory + # could be and pick the first that exists. + foreach(CANDIDATE "${Clang_DIR}/../.." "${LLVM_DIR}" "${LLVM_LIBRARY_DIRS}" + "${LLVM_BUILD_LIBRARY_DIR}" + "${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}") + # Build the resource directory path by appending 'clang/'. + set(CANDIDATE_RESOURCE_DIR "${CANDIDATE}/clang/${LLDB_CLANG_RESOURCE_DIR_NAME}") + if (IS_DIRECTORY "${CANDIDATE_RESOURCE_DIR}") + set(LLDB_EXTERNAL_CLANG_RESOURCE_DIR "${CANDIDATE_RESOURCE_DIR}") + break() + endif() + endforeach() + + if (NOT LLDB_EXTERNAL_CLANG_RESOURCE_DIR) + message(FATAL_ERROR "Expected directory for clang-resource headers not found: ${LLDB_EXTERNAL_CLANG_RESOURCE_DIR}") + endif() +endif() + # Find Apple-specific libraries or frameworks that may be needed. if (APPLE) if(NOT APPLE_EMBEDDED) diff --git a/lldb/cmake/modules/LLDBFramework.cmake b/lldb/cmake/modules/LLDBFramework.cmake index 43af71b78f246..2fb059ac458b8 100644 --- a/lldb/cmake/modules/LLDBFramework.cmake +++ b/lldb/cmake/modules/LLDBFramework.cmake @@ -110,20 +110,7 @@ if(NOT APPLE_EMBEDDED) add_dependencies(liblldb clang-resource-headers) set(clang_resource_headers_dir $) else() - # In standalone builds try the best possible guess - if(Clang_DIR) - set(clang_lib_dir ${Clang_DIR}/../..) - elseif(LLVM_DIR) - set(clang_lib_dir ${LLVM_DIR}/../..) - elseif(LLVM_LIBRARY_DIRS) - set(clang_lib_dir ${LLVM_LIBRARY_DIRS}) - elseif(LLVM_BUILD_LIBRARY_DIR) - set(clang_lib_dir ${LLVM_BUILD_LIBRARY_DIR}) - elseif(LLVM_BINARY_DIR) - set(clang_lib_dir ${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}) - endif() - set(clang_version ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}) - set(clang_resource_headers_dir ${clang_lib_dir}/clang/${clang_version}/include) + set(clang_resource_headers_dir ${LLDB_EXTERNAL_CLANG_RESOURCE_DIR}/include) if(NOT EXISTS ${clang_resource_headers_dir}) message(WARNING "Expected directory for clang-resource headers not found: ${clang_resource_headers_dir}") endif() diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index aeb1f15e294b2..a7d6592b31dc0 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -195,8 +195,27 @@ else() ) endif() -if (NOT LLDB_BUILT_STANDALONE) +# The Clang expression parser in LLDB requires the Clang resource directory to function. +if (TARGET clang-resource-headers) + # If building alongside Clang, just add a dependency to ensure it is build together with liblldb. add_dependencies(liblldb clang-resource-headers) +else() + # In a standalone build create a symlink from the LLDB library directory that points to the + # resource directory in the Clang library directory. LLDB searches relative to its install path, + # and the symlink is created in the same relative path as the resource directory of Clang when + # building alongside Clang. + # When building the LLDB framework, this isn't necessary as there we copy everything we need into + # the framework (including the Clang resourece directory). + if(NOT LLDB_BUILD_FRAMEWORK) + set(LLDB_CLANG_RESOURCE_DIR_PARENT "$/clang") + file(MAKE_DIRECTORY "${LLDB_CLANG_RESOURCE_DIR_PARENT}") + add_custom_command(TARGET liblldb POST_BUILD + COMMENT "Linking Clang resource dir into LLDB build directory: ${LLDB_CLANG_RESOURCE_DIR_PARENT}" + COMMAND ${CMAKE_COMMAND} -E make_directory "${LLDB_CLANG_RESOURCE_DIR_PARENT}" + COMMAND ${CMAKE_COMMAND} -E create_symlink "${LLDB_EXTERNAL_CLANG_RESOURCE_DIR}" + "${LLDB_CLANG_RESOURCE_DIR_PARENT}/${LLDB_CLANG_RESOURCE_DIR_NAME}" + ) + endif() endif() if(LLDB_BUILD_FRAMEWORK) From 3dffec03b0f68be8d77b0545a0eef795a6662cd3 Mon Sep 17 00:00:00 2001 From: Muhammad Omair Javaid Date: Tue, 6 Oct 2020 12:27:27 +0500 Subject: [PATCH 107/321] [LLDB] Add QEMU testing environment setup guide for SVE testing This patch adds a HowTo document to lldb docs which gives instruction for setting up a virtual environment based on QEMU emulator for LLDB testing. Instruction in this document are tested on Arm and AArch64 targets but can easily be duplicated for other targets supported by QEMU. This helps test LLDB in absence for modern AArch64 features not released in publicly available hardware till date. Reviewed By: labath Differential Revision: https://reviews.llvm.org/D82064 --- lldb/docs/index.rst | 1 + lldb/docs/resources/test.rst | 9 ++ lldb/docs/use/qemu-testing.rst | 135 +++++++++++++++++++++ lldb/scripts/lldb-test-qemu/rootfs.sh | 98 +++++++++++++++ lldb/scripts/lldb-test-qemu/run-qemu.sh | 112 ++++++++++++++++++ lldb/scripts/lldb-test-qemu/setup.sh | 151 ++++++++++++++++++++++++ 6 files changed, 506 insertions(+) create mode 100644 lldb/docs/use/qemu-testing.rst create mode 100644 lldb/scripts/lldb-test-qemu/rootfs.sh create mode 100644 lldb/scripts/lldb-test-qemu/run-qemu.sh create mode 100644 lldb/scripts/lldb-test-qemu/setup.sh diff --git a/lldb/docs/index.rst b/lldb/docs/index.rst index 909089f3cebe4..2bd66c9f49e16 100644 --- a/lldb/docs/index.rst +++ b/lldb/docs/index.rst @@ -133,6 +133,7 @@ interesting areas to contribute to lldb. use/python use/python-reference use/remote + use/qemu-testing use/troubleshooting .. toctree:: diff --git a/lldb/docs/resources/test.rst b/lldb/docs/resources/test.rst index 6f39a45d4b728..61db746532548 100644 --- a/lldb/docs/resources/test.rst +++ b/lldb/docs/resources/test.rst @@ -360,6 +360,15 @@ Currently, running the remote test suite is supported only with ``dotest.py`` (o dosep.py with a single thread), but we expect this issue to be addressed in the near future. +Running tests in QEMU System Emulation Environment +`````````````````````````````````````````````````` + +QEMU can be used to test LLDB in an emulation environment in the absence of +actual hardware. `QEMU based testing `_ +page describes how to setup a emulation environment using QEMU helper scripts +found under llvm-project/lldb/scripts/lldb-test-qemu. These scripts currently +work with Arm or AArch64, but support for other architectures can be added easily. + Debugging Test Failures ----------------------- diff --git a/lldb/docs/use/qemu-testing.rst b/lldb/docs/use/qemu-testing.rst new file mode 100644 index 0000000000000..a82dfb23a16a4 --- /dev/null +++ b/lldb/docs/use/qemu-testing.rst @@ -0,0 +1,135 @@ +Testing LLDB using QEMU +======================= + +.. contents:: + :local: + +QEMU system mode emulation +-------------------------- + +QEMU can be used to test LLDB in an emulation environment in the absence of +actual hardware. This page describes instructions to help setup a QEMU emulation +environment for testing LLDB. + +The scripts under llvm-project/lldb/scripts/lldb-test-qemu can quickly help +setup a virtual LLDB testing environment using QEMU. The scripts currently work +with Arm or AArch64, but support for other architectures can be added easily. + +* **setup.sh** is used to build the Linux kernel image and QEMU system emulation executable(s) from source. +* **rootfs.sh** is used to generate Ubuntu root file system images to be used for QEMU system mode emulation. +* **run-qemu.sh** utilizes QEMU to boot a Linux kernel image with a root file system image. + +Once we have booted our kernel we can run lldb-server in emulation environment. +Ubuntu Bionic/Focal x86_64 host was used to test these scripts instructions in this +document. Please update it according to your host distribution/architecture. + +.. note:: + Instructions on this page and QEMU helper scripts are verified on a Ubuntu Bionic/Focal (x86_64) host. Moreover, scripts require sudo/root permissions for installing dependencies and setting up QEMU host/guest network. + +Given below are some examples of common use-cases of LLDB QEMU testing +helper scripts: + +Create Ubuntu root file system image for QEMU system emulation with rootfs.sh +-------------------------------------------------------------------------------- + +**Example:** generate Ubuntu Bionic (armhf) rootfs image of size 1 GB +:: + + $ bash rootfs.sh --arch armhf --distro bionic --size 1G + +**Example:** generate Ubuntu Focal (arm64) rootfs image of size 2 GB +:: + + $ bash rootfs.sh --arch arm64 --distro focal --size 2G + +rootfs.sh has been tested for generating Ubuntu Bionic and Focal images but they can be used to generate rootfs images of other Debian Linux distribution. + +rootfs.sh defaults username of generated image to your current username on host computer. + + +Build QEMU or cross compile Linux kernel from source using setup.sh +----------------------------------------------------------------------- + +**Example:** Build QEMU binaries and Arm/AArch64 Linux kernel image +:: + +$ bash setup.sh --qemu --kernel arm +$ bash setup.sh --qemu --kernel arm64 + +**Example:** Build Linux kernel image only +:: + +$ bash setup.sh --kernel arm +$ bash setup.sh --kernel arm64 + +**Example:** Build qemu-system-arm and qemu-system-aarch64 binaries. +:: + +$ bash setup.sh --qemu + +**Example:** Remove qemu.git, linux.git and linux.build from working directory +:: + +$ bash setup.sh --clean + + +Run QEMU Arm or AArch64 system emulation using run-qemu.sh +---------------------------------------------------------- +run-qemu.sh has following dependencies: + +* Follow https://wiki.qemu.org/Documentation/Networking/NAT and set up bridge + networking for QEMU. + +* Make sure /etc/qemu-ifup script is available with executable permissions. + +* QEMU binaries must be built from source using setup.sh or provided via --qemu + commandline argument. + +* Linux kernel image must be built from source using setup.sh or provided via + --kernel commandline argument. + +* linux.build and qemu.git folder must be present in current directory if + setup.sh was used to build Linux kernel and QEMU binaries. + +* --sve option will enable AArch64 SVE mode. + + +**Example:** Run QEMU Arm or AArch64 system emulation using run-qemu.sh +:: + + $ sudo bash run-qemu.sh --arch arm --rootfs + $ sudo bash run-qemu.sh --arch arm64 --rootfs + +**Example:** Run QEMU with kernel image and qemu binary provided using commandline +:: + + $ sudo bash run-qemu.sh --arch arm64 --rootfs \ + --kernel --qemu + + +Steps for running lldb-server in QEMU system emulation environment +------------------------------------------------------------------ + +* Make sure bridge networking is enabled between host machine and QEMU VM + +* Find out ip address assigned to eth0 in emulation environment + +* Setup ssh access between host machine and emulation environment + +* Login emulation environment and install dependencies + +:: + + $ sudo apt install python-dev libedit-dev libncurses5-dev libexpat1-dev + +* Cross compile LLDB server for AArch64 Linux: Please visit https://lldb.llvm.org/resources/build.html for instructions on how to cross compile LLDB server. + +* Transfer LLDB server executable to emulation environment + +:: + + $ scp lldb-server username@ip-address-of-emulation-environment:/home/username + +* Run lldb-server inside QEMU VM + +* Try connecting to lldb-server running inside QEMU VM with selected ip:port diff --git a/lldb/scripts/lldb-test-qemu/rootfs.sh b/lldb/scripts/lldb-test-qemu/rootfs.sh new file mode 100644 index 0000000000000..0491f4be0bc2b --- /dev/null +++ b/lldb/scripts/lldb-test-qemu/rootfs.sh @@ -0,0 +1,98 @@ +#!/bin/bash + +set -e + +print_usage() { + echo "Usage:" + echo "Usage: $(basename $0) [options]" + echo -e "Creates a Ubuntu root file system image.\n" + echo -e " --help\t\t\tDisplay this information." + echo -e " --arch {armhf|arm64}\t\tSelects architecture of rootfs image." + echo -e " --distro {bionic|focal}\tSelects Ubuntu distribution of rootfs image." + echo -e " --size n{K|M|G}\t\tSets size of rootfs image to n Kilo, Mega or Giga bytes." + exit "$1" +} + +invalid_arg() { + echo "ERROR: Unrecognized argument: $1" >&2 + print_usage 1 +} + +update_repositories() { + echo -e "\nUpdating apt repositories. " + echo -e "\nPress 'y' to continue or any other key to exit..." + read -s -n 1 user_input + if [[ $user_input == 'Y' ]] || [[ $user_input == 'y' ]]; then + sudo apt update + else + exit + fi +} + +# Parse options +while [[ $# -gt 0 ]]; do + case "${END_OF_OPT}${1}" in + --help) print_usage 0 ;; + --arch) rfs_arch=$2; shift;; + --distro) rfs_distro=$2; shift;; + --size) rfs_size=$2; shift;; + *) invalid_arg "$1" ;; + esac + shift +done + +if [ -z "$rfs_arch" ]; then + echo "Missing architecture" + print_usage 1 +fi +if [ -z "$rfs_distro" ]; then + echo "Missing distribution" + print_usage 1 +fi +if [ -z "$rfs_size" ]; then + echo "Missing size" + print_usage 1 +fi + +if [[ "$rfs_arch" != "arm64" && "$rfs_arch" != "armhf" ]]; then + echo "Invalid architecture: $rfs_arch" + print_usage 1 +fi + +pat='^[0-9]+[K|M|G]$' +if [[ ! $rfs_size =~ $pat ]]; then + echo "Invalid size: $rfs_size" + print_usage 1 +fi + +update_repositories + +echo "Installing build dependencies ..." +sudo apt-get install debootstrap qemu-user-static schroot qemu-utils + +image_name=$rfs_distro-$rfs_arch-"rootfs" +echo "Creating $rfs_distro ($rfs_arch) root file system ..." +echo "Image name: $image_name.img" +echo "Image size: $rfs_size" + +qemu-img create $image_name.img $rfs_size + +mkfs.ext4 $image_name.img +mkdir $image_name.dir +sudo mount -o loop $image_name.img $image_name.dir + +sudo qemu-debootstrap --arch $rfs_arch $rfs_distro $image_name.dir + +sudo chroot $image_name.dir locale-gen en_US.UTF-8 + +sudo chroot $image_name.dir sed -i \ +'s/main/main restricted multiverse universe/g' /etc/apt/sources.list + +sudo chroot $image_name.dir sed -i '$ a\nameserver 8.8.8.8' /etc/resolv.conf + +sudo chroot $image_name.dir apt update +sudo chroot $image_name.dir apt -y install ssh bash-completion +sudo chroot $image_name.dir adduser --gecos "" $USER +sudo chroot $image_name.dir adduser $USER sudo +sudo umount $image_name.dir +rmdir $image_name.dir diff --git a/lldb/scripts/lldb-test-qemu/run-qemu.sh b/lldb/scripts/lldb-test-qemu/run-qemu.sh new file mode 100644 index 0000000000000..cb28b7aaf6420 --- /dev/null +++ b/lldb/scripts/lldb-test-qemu/run-qemu.sh @@ -0,0 +1,112 @@ +#!/bin/bash + +print_usage() { + echo "Usage: $(basename $0) --arch [arm|arm64] [options]" + echo -e "Starts QEMU system mode emulation for the architecture.\n" + echo -e " --help\t\t\tDisplay this information." + echo -e " --arch {arm|arm64}\t\tSelects architecture QEMU system emulation." + echo -e " --sve {path}\t\t\tEnables AArch64 SVE mode.\n" + echo -e " --rootfs {path}\t\tPath of root file system image." + echo -e " --qemu {path}\t\t\tPath of pre-installed qemu-system-* executable." + echo -e " --kernel {path}\t\tPath of Linux kernel prebuilt image.\n" + echo -e "By default this utility will use:" + echo -e " QEMU image built from source in qemu.git directory" + echo -e " Linux kernel image from linux.build/(arm or arm64) directory." + echo -e "Custom Linux kernel image or QEMU binary can be provided using commandline." + exit "$1" +} + +invalid_arg() { + echo "ERROR: Unrecognized argument: $1" >&2 + print_usage 1 +} + +run_qemu() { + QEMU_CORES=2 + QEMU_MEMORY=1024 + + $QEMU_BIN \ + -cpu $QEMU_CPU \ + -m $QEMU_MEMORY \ + -smp $QEMU_CORES \ + -kernel $KERNEL_IMG \ + -machine $QEMU_MACHINE \ + -drive file=$ROOTFS_IMG,if=none,format=raw,id=hd0 \ + -device virtio-blk-device,drive=hd0 \ + -append "root=/dev/vda rw ip=dhcp mem=1024M raid=noautodetect \ + crashkernel=128M rootwait console=ttyAMA0 devtmpfs.mount=0" \ + -netdev type=tap,id=net0 \ + -device virtio-net-device,netdev=net0 \ + -nographic +} + +# Parse options +while [[ $# -gt 0 ]]; do + case "${END_OF_OPT}${1}" in + --arch) ARCH=$2; shift;; + --rootfs) ROOTFS_IMG=$2; shift;; + --kernel) KERNEL_IMG=$2; shift;; + --qemu) QEMU_BIN=$2; shift;; + --sve) SVE=1;; + --help) print_usage 0 ;; + *) invalid_arg "$1" ;; + esac + shift +done + +if [ "$ARCH" == "arm64" ] && [ "$ARCH" == "arm" ]; then + echo "Invalid architecture: $ARCH" + print_usage 1 +fi + +if [[ ! -f "$ROOTFS_IMG" ]]; then + echo "No root file system image image available for emulation." + exit +fi + +if [[ ! -f "$KERNEL_IMG" ]]; then + KERNEL_IMG_PATH=$(pwd)/linux.build/"$ARCH"/arch/"$ARCH"/boot/ + + if [[ ! -d "$KERNEL_IMG_PATH" ]]; then + echo "No Linux kernel image available for emulation." + exit + fi + + if [[ "$ARCH" == "arm" ]]; then + KERNEL_IMG=$KERNEL_IMG_PATH/zImage + elif [[ "$ARCH" == "arm64" ]]; then + KERNEL_IMG=$KERNEL_IMG_PATH/Image + fi +fi + +if [[ ! -f "$QEMU_BIN" ]]; then + if [[ "$ARCH" == "arm" ]]; then + QEMU_BIN=$(pwd)/qemu.git/arm-softmmu/qemu-system-arm + elif [[ "$ARCH" == "arm64" ]]; then + QEMU_BIN=$(pwd)/qemu.git/aarch64-softmmu/qemu-system-aarch64 + fi + + if [[ ! -f "$QEMU_BIN" ]]; then + echo "QEMU $ARCH system emulation executable not found." + exit + fi +fi + +if [[ "$ARCH" == "arm" ]]; then + QEMU_MACHINE="virt,highmem=off" + QEMU_CPU="cortex-a15" + + if [[ $SVE ]]; then + echo "warning: --sve is supported by AArch64 targets only" + fi +elif [[ "$ARCH" == "arm64" ]]; then + QEMU_MACHINE=virt + QEMU_SVE_MAX_VQ=4 + QEMU_CPU="cortex-a53" + + if [[ $SVE ]]; then + QEMU_CPU="max,sve-max-vq=$QEMU_SVE_MAX_VQ" + fi +fi + +run_qemu diff --git a/lldb/scripts/lldb-test-qemu/setup.sh b/lldb/scripts/lldb-test-qemu/setup.sh new file mode 100644 index 0000000000000..c89dc16820007 --- /dev/null +++ b/lldb/scripts/lldb-test-qemu/setup.sh @@ -0,0 +1,151 @@ +#!/bin/bash + +print_usage() { + echo "Usage: $(basename $0) [options]" + echo -e "Builds QEMU and Linux kernel from source.\n" + echo -e " --help\t\t\tDisplay this information." + echo -e " --kernel {arm|arm64}\t\tBuild Linux kernel for the architecture." + echo -e " --qemu\t\t\tBuild QEMU from source." + echo -e " --clean\t\t\tRemove qemu.git and linux.git directories in current directory." + exit "$1" +} + +update_repositories() { + echo -e "\nUpdating apt repositories. " + echo -e "\nPress 'y' to continue or any other key to exit..." + read -s -n 1 user_input + if [[ $user_input == 'Y' ]] || [[ $user_input == 'y' ]]; then + sudo apt update + else + exit + fi +} + +check_dir_exists() { + user_input= + if [ -d "$1" ]; then + echo -e "\n$1 already exists in working directory and will not be updated." + echo -e "\nPress 'y' to continue or any other key to exit..." + read -s -n 1 user_input + if [[ $user_input != 'Y' ]] && [[ $user_input != 'y' ]]; then + exit + fi + fi +} + +invalid_arg() { + echo "ERROR: Unrecognized argument: $1" >&2 + print_usage 1 +} + +build_qemu() { + echo "Installing QEMU build dependencies ..." + sudo apt install git python3-dev libsdl1.2-dev build-essential libpixman-1-dev + + # Checkout source code + check_dir_exists "qemu.git" + if [ ! -d "qemu.git" ]; then + git clone --depth 1 git://git.qemu.org/qemu.git qemu.git + fi + + cd qemu.git + # We are going to build QEMU Arm and AArch64 system mode emulation. + # ./configure --help emits a list of other possible targets supported by QEMU. + ./configure --target-list=arm-softmmu,aarch64-softmmu + make -j`getconf _NPROCESSORS_ONLN` +} + +build_linux() { + echo "Installing Linux kernel build dependencies ..." + sudo apt install git bison flex build-essential libssl-dev bc + + check_dir_exists "linux.git" + + if [ ! -d "linux.git" ]; then + git clone --depth 1 \ + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git linux.git + fi + + cd linux.git + make mrproper + + if [[ "$1" == "arm" ]]; then + echo "Installing gcc-arm-linux-gnueabihf ..." + sudo apt install gcc-arm-linux-gnueabihf + + # Configure kernel_branch=master arch=arm config=vexpress_defconfig + make O=../linux.build/arm ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- \ + vexpress_defconfig + + # Trigger Arm kernel build + make -j`getconf _NPROCESSORS_ONLN` O=../linux.build/arm ARCH=arm \ + CROSS_COMPILE=arm-linux-gnueabihf- + elif [[ "$1" == "arm64" ]]; then + echo "Installing gcc-aarch64-linux-gnu ..." + sudo apt install gcc-aarch64-linux-gnu + + # Configure kernel_branch=master arch=arm64 config=defconfig + make O=../linux.build/arm64 ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- \ + defconfig + + # Trigger AArch64 kernel build + make -j`getconf _NPROCESSORS_ONLN` O=../linux.build/arm64 ARCH=arm64 \ + CROSS_COMPILE=aarch64-linux-gnu- + else + echo "ERROR: Unrecognized architecture: $1" >&2 + print_usage 1 + exit + fi +} + +clean() { + if [ -d "linux.git" ]; then + echo "Removing linux.git ..." + rm -rf linux.git + fi + + if [ -d "linux.build" ]; then + echo "Removing linux.build ..." + rm -rf linux.build + fi + + if [ -d "qemu.git" ]; then + echo "Removing qemu.git ..." + rm -rf qemu.git + fi + + exit +} + +# Parse options +while [[ $# -gt 0 ]]; do + case "${END_OF_OPT}${1}" in + -h|--help) print_usage 0 ;; + -k|--kernel) + if [ "$2" == "arm64" ] || [ "$2" == "arm" ]; then + KERNEL_ARCH=$2 + else + invalid_arg "$2" + fi + shift;; + -q|--qemu) + QEMU=1;; + -c|--clean) clean ;; + *) invalid_arg "$1" ;; + esac + shift +done + +update_repositories + +if [ "$KERNEL_ARCH" != "" ]; then + pushd . + build_linux $KERNEL_ARCH + popd +fi + +if [[ $QEMU -eq 1 ]]; then + pushd . + build_qemu + popd +fi From 25692b7765e2364896a0136f5b54dde3de2dd563 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Tue, 6 Oct 2020 08:45:53 +0100 Subject: [PATCH 108/321] [flang][NFC] Remove redundant `;` Sadly this has been causing gcc-10 builds to fail. --- flang/include/flang/Frontend/TextDiagnosticPrinter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/include/flang/Frontend/TextDiagnosticPrinter.h b/flang/include/flang/Frontend/TextDiagnosticPrinter.h index b67731d843522..754ebf323d4cb 100644 --- a/flang/include/flang/Frontend/TextDiagnosticPrinter.h +++ b/flang/include/flang/Frontend/TextDiagnosticPrinter.h @@ -23,7 +23,7 @@ namespace clang { class DiagnosticOptions; class DiagnosticsEngine; -}; // namespace clang +} // namespace clang using llvm::IntrusiveRefCntPtr; using llvm::raw_ostream; From 4a8c70c3194aeeb286b4bea1666168f710cbe04f Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 6 Oct 2020 07:59:02 +0000 Subject: [PATCH 109/321] [mlir][Linalg] Reintroduced missing verification check A verification check on the number of indexing maps seems to have dropped inadvertently. Also update the relevant roundtrip tests. --- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 6 ++++++ mlir/test/Dialect/Linalg/roundtrip.mlir | 2 ++ 2 files changed, 8 insertions(+) diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 26aa75955e3c1..082078dee3afc 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -398,6 +398,12 @@ static LogicalResult verifyGenericOp(GenericOpType op) { expectedNumSymbols = op.getShapedType(index).getRank(); } + if (op.indexing_maps().size() != op.getNumInputsAndOutputs()) + return op.emitOpError("expected the number of indexing_map (") + << op.indexing_maps().size() + << ") to be equal to the number of inputs and outputs (" + << op.getNumInputsAndOutputs() << ")"; + SmallVector indexingMaps; indexingMaps.reserve(op.indexing_maps().size()); for (auto en : llvm::enumerate(op.indexing_maps())) { diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir index 868cabb5eff35..b4347ca898877 100644 --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -343,6 +343,7 @@ func @generic_with_tensor_input(%arg0: tensor>, #accesses = [ affine_map<(i, j, k) -> (j, i)>, + affine_map<(i, j, k) -> (i, k, i + j)>, affine_map<(i, j, k) -> (i, k, i + j)> ] @@ -377,6 +378,7 @@ func @generic_with_tensor_input_and_output( #accesses = [ affine_map<(i, j, k) -> (j, i)>, + affine_map<(i, j, k) -> (i, k, i + j)>, affine_map<(i, j, k) -> (i, k, i + j)> ] From 77d3b14458dd2aaafa20e6172a893cf0abab6453 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 6 Oct 2020 10:17:47 +0200 Subject: [PATCH 110/321] [clangd] Fix an inconsistent ReasonToReject enum usage, NFC. --- clang-tools-extra/clangd/refactor/Rename.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp index c48bc2856cb7d..9de3302564fd5 100644 --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -113,7 +113,7 @@ bool isExcluded(const NamedDecl &RenameDecl) { return StdSymbols->count(printQualifiedName(RenameDecl)); } -enum ReasonToReject { +enum class ReasonToReject { NoSymbolFound, NoIndexProvided, NonIndexable, @@ -206,7 +206,7 @@ llvm::Error makeError(ReasonToReject Reason) { return "symbol may be used in other files (not eligible for indexing)"; case ReasonToReject::UnsupportedSymbol: return "symbol is not a supported kind (e.g. namespace, macro)"; - case AmbiguousSymbol: + case ReasonToReject::AmbiguousSymbol: return "there are multiple symbols at the given location"; } llvm_unreachable("unhandled reason kind"); From b81bedf7146ad5a163e9f1d7283c83ffa4e2043f Mon Sep 17 00:00:00 2001 From: George Mitenkov Date: Tue, 6 Oct 2020 11:20:10 +0300 Subject: [PATCH 111/321] [MLIR][SPIRVToLLVM] Conversion for composite extract and insert A pattern to convert `spv.CompositeInsert` and `spv.CompositeExtract`. In LLVM, there are 2 ops that correspond to each instruction depending on the container type. If the container type is a vector type, then the result of conversion is `llvm.insertelement` or `llvm.extractelement`. If the container type is an aggregate type (i.e. struct, array), the result of conversion is `llvm.insertvalue` or `llvm.extractvalue`. Reviewed By: mravishankar Differential Revision: https://reviews.llvm.org/D88205 --- .../SPIRVToLLVM/ConvertSPIRVToLLVM.cpp | 61 +++++++++++++++++++ .../SPIRVToLLVM/misc-ops-to-llvm.mlir | 38 ++++++++++++ 2 files changed, 99 insertions(+) diff --git a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp index 2c11116d295f8..6a5ea1bb06c91 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp @@ -555,6 +555,66 @@ class BranchConditionalConversionPattern } }; +/// Converts `spv.CompositeExtract` to `llvm.extractvalue` if the container type +/// is an aggregate type (struct or array). Otherwise, converts to +/// `llvm.extractelement` that operates on vectors. +class CompositeExtractPattern + : public SPIRVToLLVMConversion { +public: + using SPIRVToLLVMConversion::SPIRVToLLVMConversion; + + LogicalResult + matchAndRewrite(spirv::CompositeExtractOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto dstType = this->typeConverter.convertType(op.getType()); + if (!dstType) + return failure(); + + Type containerType = op.composite().getType(); + if (containerType.isa()) { + Location loc = op.getLoc(); + IntegerAttr value = op.indices()[0].cast(); + Value index = createI32ConstantOf(loc, rewriter, value.getInt()); + rewriter.replaceOpWithNewOp( + op, dstType, op.composite(), index); + return success(); + } + rewriter.replaceOpWithNewOp( + op, dstType, op.composite(), op.indices()); + return success(); + } +}; + +/// Converts `spv.CompositeInsert` to `llvm.insertvalue` if the container type +/// is an aggregate type (struct or array). Otherwise, converts to +/// `llvm.insertelement` that operates on vectors. +class CompositeInsertPattern + : public SPIRVToLLVMConversion { +public: + using SPIRVToLLVMConversion::SPIRVToLLVMConversion; + + LogicalResult + matchAndRewrite(spirv::CompositeInsertOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto dstType = this->typeConverter.convertType(op.getType()); + if (!dstType) + return failure(); + + Type containerType = op.composite().getType(); + if (containerType.isa()) { + Location loc = op.getLoc(); + IntegerAttr value = op.indices()[0].cast(); + Value index = createI32ConstantOf(loc, rewriter, value.getInt()); + rewriter.replaceOpWithNewOp( + op, dstType, op.composite(), op.object(), index); + return success(); + } + rewriter.replaceOpWithNewOp( + op, dstType, op.composite(), op.object(), op.indices()); + return success(); + } +}; + /// Converts SPIR-V operations that have straightforward LLVM equivalent /// into LLVM dialect operations. template @@ -1360,6 +1420,7 @@ void mlir::populateSPIRVToLLVMConversionPatterns( VariablePattern, // Miscellaneous ops + CompositeExtractPattern, CompositeInsertPattern, DirectConversionPattern, DirectConversionPattern, diff --git a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir index e114cea6c96fa..700c991463ce3 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir @@ -1,5 +1,43 @@ // RUN: mlir-opt -convert-spirv-to-llvm %s | FileCheck %s +//===----------------------------------------------------------------------===// +// spv.CompositeExtract +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: @composite_extract_array +spv.func @composite_extract_array(%arg: !spv.array<4x!spv.array<4xf32>>) "None" { + // CHECK: llvm.extractvalue %{{.*}}[1 : i32, 3 : i32] : !llvm.array<4 x array<4 x float>> + %0 = spv.CompositeExtract %arg[1 : i32, 3 : i32] : !spv.array<4x!spv.array<4xf32>> + spv.Return +} + +// CHECK-LABEL: @composite_extract_vector +spv.func @composite_extract_vector(%arg: vector<3xf32>) "None" { + // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 + // CHECK: llvm.extractelement %{{.*}}[%[[ZERO]] : !llvm.i32] : !llvm.vec<3 x float> + %0 = spv.CompositeExtract %arg[0 : i32] : vector<3xf32> + spv.Return +} + +//===----------------------------------------------------------------------===// +// spv.CompositeInsert +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: @composite_insert_struct +spv.func @composite_insert_struct(%arg0: i32, %arg1: !spv.struct>) "None" { + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1 : i32, 3 : i32] : !llvm.struct)> + %0 = spv.CompositeInsert %arg0, %arg1[1 : i32, 3 : i32] : i32 into !spv.struct> + spv.Return +} + +// CHECK-LABEL: @composite_insert_vector +spv.func @composite_insert_vector(%arg0: vector<3xf32>, %arg1: f32) "None" { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 + // CHECK: llvm.insertelement %{{.*}}, %{{.*}}[%[[ONE]] : !llvm.i32] : !llvm.vec<3 x float> + %0 = spv.CompositeInsert %arg1, %arg0[1 : i32] : f32 into vector<3xf32> + spv.Return +} + //===----------------------------------------------------------------------===// // spv.Select //===----------------------------------------------------------------------===// From 9fc535f987b4fff62ec659d965b8ba1d371977fe Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Tue, 6 Oct 2020 10:45:31 +0200 Subject: [PATCH 112/321] [AMDGPU] Fix gcc warnings uint8_t types are implicitly promoted to int, leading to a unsigned-signed comparison. Thanks for the heads-up @uabelho. Differential Revision: https://reviews.llvm.org/D88876 --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index fa3130ab3fe01..f141ca196a9ad 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3942,8 +3942,10 @@ static void packImageA16AddressToDwords( // derivatives dx/dh and dx/dv are packed with undef. if (((I + 1) >= EndIdx) || ((Intr->NumGradients / 2) % 2 == 1 && - (I == Intr->GradientStart + (Intr->NumGradients / 2) - 1 || - I == Intr->GradientStart + Intr->NumGradients - 1)) || + (I == static_cast(Intr->GradientStart + + (Intr->NumGradients / 2) - 1) || + I == static_cast(Intr->GradientStart + + Intr->NumGradients - 1))) || // Check for _L to _LZ optimization !MI.getOperand(ArgOffset + I + 1).isReg()) { PackedAddrs.push_back( From 48a82c42457b47936d8e1f4b2af6a1f3cbb6e992 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Mon, 5 Oct 2020 14:02:59 +0200 Subject: [PATCH 113/321] [clangd] Verify the diagnostic code in include-fixer diagnostic tests, NFC. Make it easier to spot which diagnostics in the include-fixer list are tested. Differential Revision: https://reviews.llvm.org/D88828 --- .../clangd/unittests/DiagnosticsTests.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp index 6974131562b32..8fd1fe385a1c8 100644 --- a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp +++ b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp @@ -720,13 +720,16 @@ int main() { UnorderedElementsAre( AllOf(Diag(Test.range("nested"), "incomplete type 'ns::X' named in nested name specifier"), + DiagName("incomplete_nested_name_spec"), WithFix(Fix(Test.range("insert"), "#include \"x.h\"\n", "Add include \"x.h\" for symbol ns::X"))), AllOf(Diag(Test.range("base"), "base class has incomplete type"), + DiagName("incomplete_base_class"), WithFix(Fix(Test.range("insert"), "#include \"x.h\"\n", "Add include \"x.h\" for symbol ns::X"))), AllOf(Diag(Test.range("access"), "member access into incomplete type 'ns::X'"), + DiagName("incomplete_member_access"), WithFix(Fix(Test.range("insert"), "#include \"x.h\"\n", "Add include \"x.h\" for symbol ns::X"))))); } @@ -789,19 +792,23 @@ void bar() { TU.build().getDiagnostics(), UnorderedElementsAre( AllOf(Diag(Test.range("unqualified1"), "unknown type name 'X'"), + DiagName("unknown_typename"), WithFix(Fix(Test.range("insert"), "#include \"x.h\"\n", "Add include \"x.h\" for symbol ns::X"))), Diag(Test.range("unqualified2"), "use of undeclared identifier 'X'"), AllOf(Diag(Test.range("qualified1"), "no type named 'X' in namespace 'ns'"), + DiagName("typename_nested_not_found"), WithFix(Fix(Test.range("insert"), "#include \"x.h\"\n", "Add include \"x.h\" for symbol ns::X"))), AllOf(Diag(Test.range("qualified2"), "no member named 'X' in namespace 'ns'"), + DiagName("no_member"), WithFix(Fix(Test.range("insert"), "#include \"x.h\"\n", "Add include \"x.h\" for symbol ns::X"))), AllOf(Diag(Test.range("global"), "no type named 'Global' in the global namespace"), + DiagName("typename_nested_not_found"), WithFix(Fix(Test.range("insert"), "#include \"global.h\"\n", "Add include \"global.h\" for symbol Global"))))); } @@ -825,6 +832,7 @@ void foo() { EXPECT_THAT(TU.build().getDiagnostics(), UnorderedElementsAre(AllOf( Diag(Test.range("unqualified"), "unknown type name 'X'"), + DiagName("unknown_typename"), WithFix(Fix(Test.range("insert"), "#include \"a.h\"\n", "Add include \"a.h\" for symbol na::X"), Fix(Test.range("insert"), "#include \"b.h\"\n", @@ -905,6 +913,7 @@ void g() { ns::$[[scope]]::X_Y(); } TU.build().getDiagnostics(), UnorderedElementsAre(AllOf( Diag(Test.range(), "no member named 'scope' in namespace 'ns'"), + DiagName("no_member"), WithFix(Fix(Test.range("insert"), "#include \"x.h\"\n", "Add include \"x.h\" for symbol ns::scope::X_Y"))))); } @@ -934,22 +943,26 @@ void f() { AllOf( Diag(Test.range("q1"), "use of undeclared identifier 'clangd'; " "did you mean 'clang'?"), + DiagName("undeclared_var_use_suggest"), WithFix(_, // change clangd to clang Fix(Test.range("insert"), "#include \"x.h\"\n", "Add include \"x.h\" for symbol clang::clangd::X"))), AllOf( Diag(Test.range("x"), "no type named 'X' in namespace 'clang'"), + DiagName("typename_nested_not_found"), WithFix(Fix(Test.range("insert"), "#include \"x.h\"\n", "Add include \"x.h\" for symbol clang::clangd::X"))), AllOf( Diag(Test.range("q2"), "use of undeclared identifier 'clangd'; " "did you mean 'clang'?"), + DiagName("undeclared_var_use_suggest"), WithFix( - _, // change clangd to clangd + _, // change clangd to clang Fix(Test.range("insert"), "#include \"y.h\"\n", "Add include \"y.h\" for symbol clang::clangd::ns::Y"))), AllOf(Diag(Test.range("ns"), "no member named 'ns' in namespace 'clang'"), + DiagName("no_member"), WithFix(Fix( Test.range("insert"), "#include \"y.h\"\n", "Add include \"y.h\" for symbol clang::clangd::ns::Y"))))); @@ -971,6 +984,7 @@ namespace c { EXPECT_THAT(TU.build().getDiagnostics(), UnorderedElementsAre(AllOf( Diag(Test.range(), "no type named 'X' in namespace 'a'"), + DiagName("typename_nested_not_found"), WithFix(Fix(Test.range("insert"), "#include \"x.h\"\n", "Add include \"x.h\" for symbol a::X"))))); } From f1ceaa200f9364ff6ca66a930872b19d99e5a339 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Wed, 30 Sep 2020 16:23:01 +0300 Subject: [PATCH 114/321] [llvm-readobj/elf][test] - Stop using precompiled binaries in mips-got.test This removed 2 last precompiled binaries from the mips-got.test. YAML descriptions are used instead. Differential revision: https://reviews.llvm.org/D88565 --- .../ELF/Inputs/got-empty.exe.mipsel | Bin 9400 -> 0 bytes .../ELF/Inputs/got-static.exe.mips | Bin 1168 -> 0 bytes .../test/tools/llvm-readobj/ELF/mips-got.test | 237 ++++++++++-------- 3 files changed, 137 insertions(+), 100 deletions(-) delete mode 100644 llvm/test/tools/llvm-readobj/ELF/Inputs/got-empty.exe.mipsel delete mode 100644 llvm/test/tools/llvm-readobj/ELF/Inputs/got-static.exe.mips diff --git a/llvm/test/tools/llvm-readobj/ELF/Inputs/got-empty.exe.mipsel b/llvm/test/tools/llvm-readobj/ELF/Inputs/got-empty.exe.mipsel deleted file mode 100644 index b57874557c8795bb79faf8d44399d9d538ac3b9e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9400 zcmeHNJ#Q015FN*agpddmA_yoF2c$>=mk&aVF2M5@jmHkD#E0e*nKAytn6d%qayO&5ZOiGdr_0doSy1=gmfKvs5aHlMxvgdLK)H ztU`ajDKc7-WK}#FlLeWQ2|0^(##V(nf&73?i~{388JIzl8Pp)A;1g#6)+TZZpaOY3 zA{N{)m3|yrDuD%S66`yOd$7I0!luG?ag>uIG&w?V;vIZ~{TUkrSm(6*%vo&5jm37; z&-VOfrk4BQ-jlO4a=N%2*1C*t)J~>pR9B?BET)~^x$B!&W_;60(2b#SeA6NtO;Wo; z2O_ikjN%($&+X^mGLLT~X9k+zUWJ~3z6zWN_=(p5%3QAGBJ=_Bcn-!d0pu2~61GFV zgx`sJ3I8{!XMOBT%2-Ff)wIb!_gFsZt0)7?fHI&ACv+L#J^Yb%u~{amhv3jg)mOqW>9FbkjGzON1@fwlFfNo+ge|%gLaqPGW&M_Sn!*f#G^zZ{Yr8 I&55h`2fOU*e*gdg diff --git a/llvm/test/tools/llvm-readobj/ELF/Inputs/got-static.exe.mips b/llvm/test/tools/llvm-readobj/ELF/Inputs/got-static.exe.mips deleted file mode 100644 index a369e32bd8834049bd921c9bebc5608682741365..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1168 zcmb7EO=}ZT6umQ@rl~HPf=Hn*MsN|j7_D|A1(^vXB19{+3petZ*vS~sOvt=KU0HYT z{5$@P?)o>fUkorW<)Z-25+GL#wFQ$M_l~=(iz!r%lF5nl0W1* zSh8*B@0zpS^4LG0i?=;TJ?A||zbCAB{SNQ&j-R(3nt31Y#6PZZy`NMvtg2_C%897q zX|8kf=lt0hrD~l|H3GUghf;&O_O8O@s@MIaPS#gPz20&6M4e=vepiVq#-}Jkwn$Qc z`>OK+w4Pf1*;{YlygyFz!D#p;FKbDrML8M|D@n|-q^z_gc-cv=^>j2i(*?Tym+VeN z_D~Vx4!IKqzYQ<^zR35G*MMf;B{Z>|$M;440^mMy2WHSj40vfd<0UkgYR$e2vtRsX zyfBYFUsdz@AMZp7->dIIx8FVLkPa&W{tJQ>_6BedU@z8f0!up?YVc2h7vB%7wUT!Q K{>~>so4mgcXmMWv diff --git a/llvm/test/tools/llvm-readobj/ELF/mips-got.test b/llvm/test/tools/llvm-readobj/ELF/mips-got.test index 400e0a2736a07..1615ce0b0e1bb 100644 --- a/llvm/test/tools/llvm-readobj/ELF/mips-got.test +++ b/llvm/test/tools/llvm-readobj/ELF/mips-got.test @@ -33,8 +33,9 @@ Sections: 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, ## Global entry 1 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB ] ## Global entry 2 Size: [[SIZE=]] - - Name: .dynamic - Type: SHT_DYNAMIC + - Name: .dynamic + Type: SHT_DYNAMIC + ShType: [[DYNAMICTYPE=]] Entries: - Tag: DT_MIPS_LOCAL_GOTNO Value: 4 @@ -129,6 +130,70 @@ DynamicSymbols: # RUN: llvm-readelf -A %t.other.entries2.exe 2>&1 | FileCheck %s -DOTHERNUM=2 \ # RUN: --check-prefixes=BASIC-GNU,BASIC-GNU-OTHER --strict-whitespace --match-full-lines +## Check how we print static GOT. Our code assumes that the GOT is static when there +## is no .dynamic section, so we change its type to hide it. + +# RUN: yaml2obj --docnum=1 -DDYNAMICTYPE=SHT_PROGBITS %s -o %t.got-static.exe +# RUN: llvm-readobj -A %t.got-static.exe | FileCheck %s --check-prefix=STATIC-GOT-LLVM +# RUN: llvm-readelf -A %t.got-static.exe | FileCheck %s --check-prefix=STATIC-GOT-GNU + +# STATIC-GOT-LLVM: Static GOT { +# STATIC-GOT-LLVM-NEXT: Canonical gp value: 0x9112 +# STATIC-GOT-LLVM-NEXT: Reserved entries [ +# STATIC-GOT-LLVM-NEXT: Entry { +# STATIC-GOT-LLVM-NEXT: Address: 0x1122 +# STATIC-GOT-LLVM-NEXT: Access: -32752 +# STATIC-GOT-LLVM-NEXT: Initial: 0x0 +# STATIC-GOT-LLVM-NEXT: Purpose: Lazy resolver +# STATIC-GOT-LLVM-NEXT: } +# STATIC-GOT-LLVM-NEXT: Entry { +# STATIC-GOT-LLVM-NEXT: Address: 0x112A +# STATIC-GOT-LLVM-NEXT: Access: -32744 +# STATIC-GOT-LLVM-NEXT: Initial: 0xFFFFFFFFFFFFFFFF +# STATIC-GOT-LLVM-NEXT: Purpose: Module pointer (GNU extension) +# STATIC-GOT-LLVM-NEXT: } +# STATIC-GOT-LLVM-NEXT: ] +# STATIC-GOT-LLVM-NEXT: Local entries [ +# STATIC-GOT-LLVM-NEXT: Entry { +# STATIC-GOT-LLVM-NEXT: Address: 0x1132 +# STATIC-GOT-LLVM-NEXT: Access: -32736 +# STATIC-GOT-LLVM-NEXT: Initial: 0x1111111111111111 +# STATIC-GOT-LLVM-NEXT: } +# STATIC-GOT-LLVM-NEXT: Entry { +# STATIC-GOT-LLVM-NEXT: Address: 0x113A +# STATIC-GOT-LLVM-NEXT: Access: -32728 +# STATIC-GOT-LLVM-NEXT: Initial: 0x2222222222222222 +# STATIC-GOT-LLVM-NEXT: } +# STATIC-GOT-LLVM-NEXT: Entry { +# STATIC-GOT-LLVM-NEXT: Address: 0x1142 +# STATIC-GOT-LLVM-NEXT: Access: -32720 +# STATIC-GOT-LLVM-NEXT: Initial: 0xAAAAAAAAAAAAAAAA +# STATIC-GOT-LLVM-NEXT: } +# STATIC-GOT-LLVM-NEXT: Entry { +# STATIC-GOT-LLVM-NEXT: Address: 0x114A +# STATIC-GOT-LLVM-NEXT: Access: -32712 +# STATIC-GOT-LLVM-NEXT: Initial: 0xBBBBBBBBBBBBBBBB +# STATIC-GOT-LLVM-NEXT: } +# STATIC-GOT-LLVM-NEXT: ] +# STATIC-GOT-LLVM-NEXT: } + +# STATIC-GOT-GNU: Static GOT: +# STATIC-GOT-GNU-NEXT: Canonical gp value: 0000000000009112 +# STATIC-GOT-GNU-EMPTY: +# STATIC-GOT-GNU-NEXT: Reserved entries: +# STATIC-GOT-GNU-NEXT: Address Access Initial Purpose +# STATIC-GOT-GNU-NEXT: 0000000000001122 -32752(gp) 0000000000000000 Lazy resolver +# STATIC-GOT-GNU-NEXT: 000000000000112a -32744(gp) ffffffffffffffff Module pointer (GNU extension) +# STATIC-GOT-GNU-EMPTY: +# STATIC-GOT-GNU-NEXT: Local entries: +# STATIC-GOT-GNU-NEXT: Address Access Initial +# STATIC-GOT-GNU-NEXT: 0000000000001132 -32736(gp) 1111111111111111 +# STATIC-GOT-GNU-NEXT: 000000000000113a -32728(gp) 2222222222222222 +# STATIC-GOT-GNU-NEXT: 0000000000001142 -32720(gp) aaaaaaaaaaaaaaaa +# STATIC-GOT-GNU-NEXT: 000000000000114a -32712(gp) bbbbbbbbbbbbbbbb +# STATIC-GOT-GNU-EMPTY: +# STATIC-GOT-GNU-NOT: {{.}} + ## Check how we dump 32-bit inputs. # RUN: yaml2obj --docnum=2 %s -o %t.got32.exe @@ -238,109 +303,81 @@ DynamicSymbols: - Name: bar Type: STT_FUNC -# RUN: llvm-readobj -A %p/Inputs/got-empty.exe.mipsel | \ -# RUN: FileCheck %s -check-prefix GOT-EMPTY -# RUN: llvm-readobj -A %p/Inputs/got-static.exe.mips | \ -# RUN: FileCheck %s -check-prefix GOT-STATIC - -# RUN: llvm-readelf -A %p/Inputs/got-empty.exe.mipsel | \ -# RUN: FileCheck %s --strict-whitespace -check-prefix GNU-GOT-EMPTY -# RUN: llvm-readelf -A %p/Inputs/got-static.exe.mips | \ -# RUN: FileCheck %s --strict-whitespace -check-prefix GNU-GOT-STATIC - -# GOT-EMPTY: Primary GOT { -# GOT-EMPTY-NEXT: Canonical gp value: 0x409FF0 -# GOT-EMPTY-NEXT: Reserved entries [ -# GOT-EMPTY-NEXT: Entry { -# GOT-EMPTY-NEXT: Address: 0x402000 -# GOT-EMPTY-NEXT: Access: -32752 -# GOT-EMPTY-NEXT: Initial: 0x0 -# GOT-EMPTY-NEXT: Purpose: Lazy resolver -# GOT-EMPTY-NEXT: } -# GOT-EMPTY-NEXT: Entry { -# GOT-EMPTY-NEXT: Address: 0x402004 -# GOT-EMPTY-NEXT: Access: -32748 -# GOT-EMPTY-NEXT: Initial: 0x80000000 -# GOT-EMPTY-NEXT: Purpose: Module pointer (GNU extension) -# GOT-EMPTY-NEXT: } -# GOT-EMPTY-NEXT: ] -# GOT-EMPTY-NEXT: Local entries [ -# GOT-EMPTY-NEXT: ] -# GOT-EMPTY-NEXT: Global entries [ -# GOT-EMPTY-NEXT: ] -# GOT-EMPTY-NEXT: Number of TLS and multi-GOT entries: 2 -# GOT-EMPTY-NEXT: } - -# GOT-STATIC: Static GOT { -# GOT-STATIC-NEXT: Canonical gp value: 0x418100 -# GOT-STATIC-NEXT: Reserved entries [ -# GOT-STATIC-NEXT: Entry { -# GOT-STATIC-NEXT: Address: 0x410110 -# GOT-STATIC-NEXT: Access: -32752 -# GOT-STATIC-NEXT: Initial: 0x0 -# GOT-STATIC-NEXT: Purpose: Lazy resolver -# GOT-STATIC-NEXT: } -# GOT-STATIC-NEXT: Entry { -# GOT-STATIC-NEXT: Address: 0x410114 -# GOT-STATIC-NEXT: Access: -32748 -# GOT-STATIC-NEXT: Initial: 0x80000000 -# GOT-STATIC-NEXT: Purpose: Module pointer (GNU extension) -# GOT-STATIC-NEXT: } -# GOT-STATIC-NEXT: ] -# GOT-STATIC-NEXT: Local entries [ -# GOT-STATIC-NEXT: Entry { -# GOT-STATIC-NEXT: Address: 0x410118 -# GOT-STATIC-NEXT: Access: -32744 -# GOT-STATIC-NEXT: Initial: 0x400000 -# GOT-STATIC-NEXT: } -# GOT-STATIC-NEXT: Entry { -# GOT-STATIC-NEXT: Address: 0x41011C -# GOT-STATIC-NEXT: Access: -32740 -# GOT-STATIC-NEXT: Initial: 0x400100 -# GOT-STATIC-NEXT: } -# GOT-STATIC-NEXT: Entry { -# GOT-STATIC-NEXT: Address: 0x410120 -# GOT-STATIC-NEXT: Access: -32736 -# GOT-STATIC-NEXT: Initial: 0x400104 -# GOT-STATIC-NEXT: } -# GOT-STATIC-NEXT: ] -# GOT-STATIC-NEXT: } - -# GNU-GOTY : Primary GOT: -# GNU-GOT-EMPTY: Canonical gp value: 00409ff0 - -# GNU-GOTY : Reserved entries: -# GNU-GOT-EMPTY: Address Access Initial Purpose -# GNU-GOT-EMPTY: 00402000 -32752(gp) 00000000 Lazy resolver -# GNU-GOT-EMPTY: 00402004 -32748(gp) 80000000 Module pointer (GNU extension) - -# GNU-GOT-STATIC: Static GOT: -# GNU-GOT-STATIC-NEXT: Canonical gp value: 00418100 - -# GNU-GOT-STATIC: Reserved entries: -# GNU-GOT-STATIC-NEXT: Address Access Initial Purpose -# GNU-GOT-STATIC-NEXT: 00410110 -32752(gp) 00000000 Lazy resolver -# GNU-GOT-STATIC-NEXT: 00410114 -32748(gp) 80000000 Module pointer (GNU extension) - -# GNU-GOT-STATIC: Local entries: -# GNU-GOT-STATIC-NEXT: Address Access Initial -# GNU-GOT-STATIC-NEXT: 00410118 -32744(gp) 00400000 -# GNU-GOT-STATIC-NEXT: 0041011c -32740(gp) 00400100 -# GNU-GOT-STATIC-NEXT: 00410120 -32736(gp) 00400104 +## Check what we print when there are no local nor global GOT entries. + +# RUN: yaml2obj --docnum=3 %s -o %t.got-noentries.exe +# RUN: llvm-readobj -A %t.got-noentries.exe | FileCheck %s --check-prefix=GOT-EMPTY-LLVM +# RUN: llvm-readelf -A %t.got-noentries.exe | FileCheck %s --check-prefix=GOT-EMPTY-GNU + +# GOT-EMPTY-LLVM: Primary GOT { +# GOT-EMPTY-LLVM-NEXT: Canonical gp value: 0x9112 +# GOT-EMPTY-LLVM-NEXT: Reserved entries [ +# GOT-EMPTY-LLVM-NEXT: Entry { +# GOT-EMPTY-LLVM-NEXT: Address: 0x1122 +# GOT-EMPTY-LLVM-NEXT: Access: -32752 +# GOT-EMPTY-LLVM-NEXT: Initial: 0x0 +# GOT-EMPTY-LLVM-NEXT: Purpose: Lazy resolver +# GOT-EMPTY-LLVM-NEXT: } +# GOT-EMPTY-LLVM-NEXT: Entry { +# GOT-EMPTY-LLVM-NEXT: Address: 0x112A +# GOT-EMPTY-LLVM-NEXT: Access: -32744 +# GOT-EMPTY-LLVM-NEXT: Initial: 0xFFFFFFFFFFFFFFFF +# GOT-EMPTY-LLVM-NEXT: Purpose: Module pointer (GNU extension) +# GOT-EMPTY-LLVM-NEXT: } +# GOT-EMPTY-LLVM-NEXT: ] +# GOT-EMPTY-LLVM-NEXT: Local entries [ +# GOT-EMPTY-LLVM-NEXT: ] +# GOT-EMPTY-LLVM-NEXT: Global entries [ +# GOT-EMPTY-LLVM-NEXT: ] +# GOT-EMPTY-LLVM-NEXT: Number of TLS and multi-GOT entries: 0 +# GOT-EMPTY-LLVM-NEXT: } + +# GOT-EMPTY-GNU: Primary GOT: +# GOT-EMPTY-GNU-NEXT: Canonical gp value: 0000000000009112 +# GOT-EMPTY-GNU-EMPTY: +# GOT-EMPTY-GNU-NEXT: Reserved entries: +# GOT-EMPTY-GNU-NEXT: Address Access Initial Purpose +# GOT-EMPTY-GNU-NEXT: 0000000000001122 -32752(gp) 0000000000000000 Lazy resolver +# GOT-EMPTY-GNU-NEXT: 000000000000112a -32744(gp) ffffffffffffffff Module pointer (GNU extension) +# GOT-EMPTY-GNU-NOT: {{.}} + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_MIPS +Sections: + - Name: .got + Type: SHT_PROGBITS + Address: 0x1122 + ContentArray: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ## Lazy resolver. + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF ] ## Module pointer (GNU extension) + Size: [[SIZE=]] + - Name: .dynamic + Type: SHT_DYNAMIC + Entries: + - Tag: DT_MIPS_LOCAL_GOTNO + Value: 2 + - Tag: DT_MIPS_GOTSYM + Value: 1 + - Tag: DT_PLTGOT + Value: 0x1122 +DynamicSymbols: [] ## Check we report errors when dynamic tags, needed for dumping GOT, are missing. -# RUN: yaml2obj --docnum=3 -DTAG1=DT_MIPS_LOCAL_GOTNO -DTAG2=DT_MIPS_GOTSYM %s -o %t.err1.o +# RUN: yaml2obj --docnum=4 -DTAG1=DT_MIPS_LOCAL_GOTNO -DTAG2=DT_MIPS_GOTSYM %s -o %t.err1.o # RUN: not llvm-readobj -A %t.err1.o 2>&1 | FileCheck %s -DFILE=%t.err1.o -check-prefix ERR1 # ERR1: error: '[[FILE]]': cannot find PLTGOT dynamic tag -# RUN: yaml2obj --docnum=3 -DTAG1=DT_PLTGOT -DTAG2=DT_MIPS_GOTSYM %s -o %t.err2.o +# RUN: yaml2obj --docnum=4 -DTAG1=DT_PLTGOT -DTAG2=DT_MIPS_GOTSYM %s -o %t.err2.o # RUN: not llvm-readobj -A %t.err2.o 2>&1 | FileCheck %s -DFILE=%t.err2.o -check-prefix ERR2 # ERR2: error: '[[FILE]]': cannot find MIPS_LOCAL_GOTNO dynamic tag -# RUN: yaml2obj --docnum=3 -DTAG1=DT_PLTGOT -DTAG2=DT_MIPS_LOCAL_GOTNO %s -o %t.err3.o +# RUN: yaml2obj --docnum=4 -DTAG1=DT_PLTGOT -DTAG2=DT_MIPS_LOCAL_GOTNO %s -o %t.err3.o # RUN: not llvm-readobj -A %t.err3.o 2>&1 | FileCheck %s -DFILE=%t.err3.o -check-prefix ERR3 # ERR3: error: '[[FILE]]': cannot find MIPS_GOTSYM dynamic tag @@ -363,12 +400,12 @@ Sections: Value: 0 DynamicSymbols: [] -# RUN: yaml2obj --docnum=4 -DVAL1=0xffff %s -o %t.err4.o +# RUN: yaml2obj --docnum=5 -DVAL1=0xffff %s -o %t.err4.o # RUN: not llvm-readobj -A %t.err4.o 2>&1 | FileCheck %s -DFILE=%t.err4.o -check-prefix=ERR4 # ERR4: error: '[[FILE]]': DT_MIPS_GOTSYM value (65535) exceeds the number of dynamic symbols (1) -# RUN: yaml2obj --docnum=4 -DVAL2=0xffff %s -o %t.err5.o +# RUN: yaml2obj --docnum=5 -DVAL2=0xffff %s -o %t.err5.o # RUN: not llvm-readobj -A %t.err5.o 2>&1 | FileCheck %s -DFILE=%t.err5.o -check-prefix=ERR5 # ERR5: error: '[[FILE]]': there is no non-empty GOT section at 0xffff @@ -392,7 +429,7 @@ Sections: DynamicSymbols: [] ## Check that we do not report a warning about the .got section when we are able to locate it by name. -# RUN: yaml2obj --docnum=5 -DNAME=0xffff %s -o %t.err6.o +# RUN: yaml2obj --docnum=6 -DNAME=0xffff %s -o %t.err6.o # RUN: llvm-readobj -A %t.err6.o 2>&1 | \ # RUN: FileCheck %s -DFILE=%t.err6.o -check-prefix=NAME-ERR-FOUND --implicit-check-not=warning: # RUN: llvm-readelf -A %t.err6.o 2>&1 | \ @@ -417,7 +454,7 @@ Sections: ShName: [[NAME=]] ## Check we report a warning when we are unable to find the .got section due to an error. -# RUN: yaml2obj --docnum=5 -DGOTNAME=0xffff %s -o %t.err7.o +# RUN: yaml2obj --docnum=6 -DGOTNAME=0xffff %s -o %t.err7.o # RUN: llvm-readelf -A %t.err7.o 2>&1 | FileCheck %s -DFILE=%t.err7.o --check-prefix=NAME-ERR-NOTFOUND --implicit-check-not=warning: # RUN: llvm-readobj -A %t.err7.o 2>&1 | FileCheck %s -DFILE=%t.err7.o --check-prefix=NAME-ERR-NOTFOUND --implicit-check-not=warning: @@ -427,7 +464,7 @@ Sections: ## sections with the same address as the .got section. ## In this test the empty .data section has the same address as the .got section. -# RUN: yaml2obj --docnum=6 %s -o %t.err7.o +# RUN: yaml2obj --docnum=7 %s -o %t.err7.o # RUN: llvm-readobj -A %t.err7.o 2>&1 | FileCheck %s -DFILE=%t.err7.o --check-prefix=SAME-ADDR-LLVM # RUN: llvm-readelf -A %t.err7.o 2>&1 | FileCheck %s -DFILE=%t.err7.o --check-prefix=SAME-ADDR-GNU @@ -496,7 +533,7 @@ DynamicSymbols: - Name: foo ## Check how we print global GOT entries when they are unnamed section symbols. -# RUN: yaml2obj --docnum=7 %s -o %t.err8.o +# RUN: yaml2obj --docnum=8 %s -o %t.err8.o # RUN: llvm-readobj -A %t.err8.o 2>&1 | FileCheck %s -DFILE=%t.err8.o --check-prefix=SEC-SYMS-LLVM # RUN: llvm-readelf -A %t.err8.o 2>&1 | FileCheck %s -DFILE=%t.err8.o --check-prefix=SEC-SYMS-GNU From cef0de5eb59dde6369645d37883f393354c99acd Mon Sep 17 00:00:00 2001 From: Mauri Mustonen Date: Tue, 6 Oct 2020 10:11:20 +0100 Subject: [PATCH 115/321] [VPlan] Add vplan native path vectorization test case for inner loop reduction Regarding this bug I posted earlier: https://bugs.llvm.org/show_bug.cgi?id=47035 After reading through LLVM source code and getting familiar with VPlan I was able to vectorize the code using by enabling VPlan native path. After talking with @fhahn he suggested that I contribute this as a test case. So here it is. I tried to follow the available guides how to do this best I could. I modified IR code by hand to have more clear variable names instead of numbers. One thing what I'd like to get input from someone is that is current CHECK lines sufficient enough to verify that the inner loop has been vectorized properly? Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D87564 --- .../vplan-vectorize-inner-loop-reduction.ll | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll diff --git a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll new file mode 100644 index 0000000000000..3870ab789f176 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll @@ -0,0 +1,82 @@ +; RUN: opt -loop-vectorize -force-vector-width=4 -enable-vplan-native-path -S %s | FileCheck %s + +; Vectorize explict marked outer loop using vplan native path. Inner loop +; contains simple double add reduction. IR is compiled and modified by hand +; from following C code: +; void inner_loop_reduction(const double* restrict in_a, const double* restrict in_b, double* restrict out) +; { +; #pragma clang loop vectorize(enable) +; for (int i = 0; i < 1000; ++i) { +; double a = in_a[i]; +; double b = in_b[i]; +; for (int j = 0; j < 10000; ++j) { +; a = a + b; +; } +; out[i] = a; +; } +; } +define void @inner_loop_reduction(double* noalias nocapture readonly %a.in, double* noalias nocapture readonly %b.in, double* noalias nocapture %c.out) { +; CHECK-LABEL: @inner_loop_reduction( + +; CHECK: vector.body: +; CHECK-NEXT: %[[FOR1_INDEX:.*]] = phi i64 [ 0, %[[LABEL_PR:.*]] ], [ %{{.*}}, %[[LABEL_FOR1_LATCH:.*]] ] +; CHECK: %[[VEC_INDEX:.*]] = phi <4 x i64> [ , %[[LABEL_PR]] ], [ %{{.*}}, %[[LABEL_FOR1_LATCH]] ] +; CHECK-NEXT: %[[A_PTR:.*]] = getelementptr inbounds double, double* %a.in, <4 x i64> %[[VEC_INDEX]] +; CHECK-NEXT: %[[MASKED_GATHER1:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %[[A_PTR]], i32 8, <4 x i1> , <4 x double> undef) +; CHECK-NEXT: %[[B_PTR:.*]] = getelementptr inbounds double, double* %b.in, <4 x i64> %[[VEC_INDEX]] +; CHECK-NEXT: %[[MASKED_GATHER2:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %[[B_PTR]], i32 8, <4 x i1> , <4 x double> undef) +; CHECK-NEXT: br label %[[FOR2_HEADER:.*]] + +; CHECK: [[FOR2_HEADER]]: +; CHECK-NEXT: %[[FOR2_INDEX:.*]] = phi <4 x i32> [ %[[FOR2_INDEX_NEXT:.*]], %[[FOR2_HEADER]] ], [ zeroinitializer, %vector.body ] +; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[REDUCTION_NEXT:.*]], %[[FOR2_HEADER]] ], [ %[[MASKED_GATHER1]], %vector.body ] +; CHECK-NEXT: %[[REDUCTION_NEXT]] = fadd <4 x double> %[[MASKED_GATHER2]], %[[REDUCTION]] +; CHECK-NEXT: %[[FOR2_INDEX_NEXT]] = add nuw nsw <4 x i32> %[[FOR2_INDEX]], +; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i32> %[[FOR2_INDEX_NEXT]], +; CHECK-NEXT: %[[EXIT_COND:.*]] = extractelement <4 x i1> %[[VEC_PTR]], i32 0 +; CHECK-NEXT: br i1 %[[EXIT_COND]], label %[[FOR1_LATCH:.*]], label %{{.*}} + +; CHECK: [[FOR1_LATCH]]: +; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[REDUCTION_NEXT]], %[[FOR2_HEADER]] ] +; CHECK-NEXT: %[[C_PTR:.*]] = getelementptr inbounds double, double* %c.out, <4 x i64> %[[VEC_INDEX]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %[[REDUCTION]], <4 x double*> %[[C_PTR]], i32 8, <4 x i1> ) +; CHECK-NEXT: %[[VEC_INDEX_NEXT:.*]] = add nuw nsw <4 x i64> %[[VEC_INDEX]], +; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i64> %[[VEC_INDEX_NEXT]], +; CHECK-NEXT: %{{.*}} = extractelement <4 x i1> %[[VEC_PTR]], i32 0 +; CHECK-NEXT: %[[FOR1_INDEX_NEXT:.*]] = add i64 %[[FOR1_INDEX]], 4 +; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], +; CHECK-NEXT: %[[EXIT_COND:.*]] = icmp eq i64 %[[FOR1_INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 %[[EXIT_COND]], label %{{.*}}, label %vector.body + +entry: + br label %for1.header + +for1.header: ; preds = %entry + %indvar1 = phi i64 [ 0, %entry ], [ %indvar11, %for1.latch ] + %a.ptr = getelementptr inbounds double, double* %a.in, i64 %indvar1 + %a = load double, double* %a.ptr, align 8 + %b.ptr = getelementptr inbounds double, double* %b.in, i64 %indvar1 + %b = load double, double* %b.ptr, align 8 + br label %for2.header + +for2.header: ; preds = %for1.header, %for2.header + %indvar2 = phi i32 [ 0, %for1.header ], [ %indvar21, %for2.header ] + %a.reduction = phi double [ %a, %for1.header ], [ %a.reduction1, %for2.header ] + %a.reduction1 = fadd double %b, %a.reduction + %indvar21 = add nuw nsw i32 %indvar2, 1 + %for2.cond = icmp eq i32 %indvar21, 10000 + br i1 %for2.cond, label %for1.latch, label %for2.header + +for1.latch: ; preds = %for2.header + %c.ptr = getelementptr inbounds double, double* %c.out, i64 %indvar1 + store double %a.reduction1, double* %c.ptr, align 8 + %indvar11 = add nuw nsw i64 %indvar1, 1 + %for1.cond = icmp eq i64 %indvar11, 1000 + br i1 %for1.cond, label %exit, label %for1.header, !llvm.loop !0 + +exit: ; preds = %for1.latch + ret void +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.vectorize.enable", i1 true} From 8ed7946a7d94f9d23b7f33356a1903d481daa5a0 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Tue, 6 Oct 2020 11:28:55 +0200 Subject: [PATCH 116/321] Revert "[llvm-objcopy][MachO] Add missing std::move." This reverts commit 6e25586990b93e2c9eaaa4f473b6720ccd646c46. It depends on 32c8435ef70031d7bd3dce48e41bdce65747e123, which I'm reverting due to ASan failures. Details in https://reviews.llvm.org/D88400. --- llvm/tools/llvm-objcopy/llvm-objcopy.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp index e1f1ed8f3cc1b..8cd58572f5a19 100644 --- a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp @@ -187,7 +187,7 @@ createNewArchiveMembers(CopyConfig &Config, const Archive &Ar) { } if (Err) return createFileError(Config.InputFilename, std::move(Err)); - return std::move(NewArchiveMembers); + return NewArchiveMembers; } } // end namespace objcopy From 80f66ac0d544d2d9d3108033148d60bb4760b319 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Tue, 6 Oct 2020 11:29:24 +0200 Subject: [PATCH 117/321] Revert "[llvm-objcopy][MachO] Add support for universal binaries" This reverts commit 32c8435ef70031d7bd3dce48e41bdce65747e123. It fails ASan, details in https://reviews.llvm.org/D88400. --- .../llvm/Object/MachOUniversalWriter.h | 6 -- llvm/lib/Object/MachOUniversalWriter.cpp | 5 -- .../tools/llvm-objcopy/MachO/strip-all.test | 5 -- .../llvm-objcopy/MachO/universal-object.test | 42 ----------- .../tools/llvm-objcopy/MachO/MachOObjcopy.cpp | 72 ------------------- llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h | 4 -- llvm/tools/llvm-objcopy/llvm-objcopy.cpp | 26 +------ llvm/tools/llvm-objcopy/llvm-objcopy.h | 32 --------- 8 files changed, 3 insertions(+), 189 deletions(-) delete mode 100644 llvm/test/tools/llvm-objcopy/MachO/universal-object.test delete mode 100644 llvm/tools/llvm-objcopy/llvm-objcopy.h diff --git a/llvm/include/llvm/Object/MachOUniversalWriter.h b/llvm/include/llvm/Object/MachOUniversalWriter.h index 606db94c9f202..49352440dca17 100644 --- a/llvm/include/llvm/Object/MachOUniversalWriter.h +++ b/llvm/include/llvm/Object/MachOUniversalWriter.h @@ -43,12 +43,6 @@ class Slice { Slice(const MachOObjectFile &O, uint32_t Align); - /// This constructor takes prespecified \param CPUType, \param CPUSubType, - /// \param ArchName, \param Align instead of inferring them from the archive - /// memebers. - Slice(const Archive &A, uint32_t CPUType, uint32_t CPUSubType, - std::string ArchName, uint32_t Align); - static Expected create(const Archive &A, LLVMContext *LLVMCtx = nullptr); diff --git a/llvm/lib/Object/MachOUniversalWriter.cpp b/llvm/lib/Object/MachOUniversalWriter.cpp index 4bb467e56a6f9..165964e077ce3 100644 --- a/llvm/lib/Object/MachOUniversalWriter.cpp +++ b/llvm/lib/Object/MachOUniversalWriter.cpp @@ -75,11 +75,6 @@ static uint32_t calculateAlignment(const MachOObjectFile &ObjectFile) { } } -Slice::Slice(const Archive &A, uint32_t CPUType, uint32_t CPUSubType, - std::string ArchName, uint32_t Align) - : B(&A), CPUType(CPUType), CPUSubType(CPUSubType), - ArchName(std::move(ArchName)), P2Alignment(Align) {} - Slice::Slice(const MachOObjectFile &O, uint32_t Align) : B(&O), CPUType(O.getHeader().cputype), CPUSubType(O.getHeader().cpusubtype), diff --git a/llvm/test/tools/llvm-objcopy/MachO/strip-all.test b/llvm/test/tools/llvm-objcopy/MachO/strip-all.test index cb41b353ec53c..4ff31f5c1e422 100644 --- a/llvm/test/tools/llvm-objcopy/MachO/strip-all.test +++ b/llvm/test/tools/llvm-objcopy/MachO/strip-all.test @@ -27,11 +27,6 @@ # cmp %t4 %t.dwarf.stripped # cmp %t5 %t.dwarf.stripped -# RUN: llvm-lipo %t.dwarf -create -output %t.dwarf.universal -# RUN: llvm-strip %t.dwarf.universal -o %t.dwarf.universal.stripped -# RUN: llvm-lipo %t.dwarf.universal.stripped -thin x86_64 -output %t6 -# RUN: cmp %t6 %t.dwarf.stripped - ## Make sure that debug sections are removed. # DWARF: Sections [ # DWARF-NOT: Name: __debug_str diff --git a/llvm/test/tools/llvm-objcopy/MachO/universal-object.test b/llvm/test/tools/llvm-objcopy/MachO/universal-object.test deleted file mode 100644 index a6146fd56483a..0000000000000 --- a/llvm/test/tools/llvm-objcopy/MachO/universal-object.test +++ /dev/null @@ -1,42 +0,0 @@ -# This test verifies that llvm-objcopy copies a univeral Mach-O object file properly. - -# RUN: yaml2obj %p/Inputs/i386.yaml -o %t.i386 -# RUN: yaml2obj %p/Inputs/x86_64.yaml -o %t.x86_64 - -## Case 1: copy a universal object containing regular Mach-O objects. -# RUN: llvm-lipo %t.i386 %t.x86_64 -create -output %t.universal -# RUN: llvm-objcopy %t.universal %t.universal.copy -# RUN: llvm-lipo %t.universal.copy -archs | FileCheck --check-prefix=VERIFY_ARCHS %s -# RUN: llvm-lipo %t.universal.copy -thin i386 -output %t.i386.copy -# RUN: llvm-lipo %t.universal.copy -thin x86_64 -output %t.x86_64.copy -# RUN: cmp %t.i386 %t.i386.copy -# RUN: cmp %t.x86_64 %t.x86_64.copy - -## Case 2: copy a universal object file containing an archive. -# RUN: rm -f %t.archive.i386 -# RUN: llvm-ar cr %t.archive.i386 %t.i386 -# RUN: llvm-lipo %t.archive.i386 %t.x86_64 -create -output %t.universal.containing.archive -# RUN: llvm-objcopy %t.universal.containing.archive %t.universal.containing.archive.copy -# RUN: llvm-lipo %t.universal.containing.archive.copy -archs | FileCheck --check-prefix=VERIFY_ARCHS %s -# RUN: llvm-lipo %t.universal.containing.archive.copy -thin i386 -output %t.archive.i386.copy -# RUN: llvm-lipo %t.universal.containing.archive.copy -thin x86_64 -output %t.archive.x86_64.copy -# RUN: cmp %t.archive.i386 %t.archive.i386.copy -# RUN: cmp %t.x86_64 %t.archive.x86_64.copy - -## Case 3: copy an archive containing a universal object. -# RUN: llvm-ar cr %t.archive.containing.universal %t.universal -# RUN: llvm-objcopy %t.archive.containing.universal %t.archive.containing.universal.copy - -## Case 4: try to copy a universal object file contaning a bitcode slice. -# RUN: echo 'target triple = "arm64-apple-ios8.0.0"' | llvm-as -o %t.bitcode -# RUN: llvm-lipo %t.bitcode %t.x86_64 -create -output %t.universal.containing.bitcode -# RUN: not llvm-objcopy %t.universal.containing.bitcode %t.universal.containing.bitcode.copy 2>&1 \ -# RUN: | FileCheck --check-prefix=UNSUPPORTED_UNIVERSAL_OBJECT %s - -## Case 5: try to copy an archive containing an unsupported universal object. -# RUN: llvm-ar cr %t.archive.universal.bitcode %t.universal.containing.bitcode -# RUN: not llvm-objcopy %t.archive.universal.bitcode %t.archive.universal.bitcode.copy 2>&1 \ -# RUN: | FileCheck --check-prefix=UNSUPPORTED_UNIVERSAL_OBJECT %s - -# VERIFY_ARCHS: i386 x86_64 -# UNSUPPORTED_UNIVERSAL_OBJECT: slice for 'arm64' of the universal Mach-O binary {{.*}} is not a Mach-O object or an archive diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp index 28b4ec655a2e1..47a08d33002af 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp @@ -8,13 +8,9 @@ #include "MachOObjcopy.h" #include "../CopyConfig.h" -#include "../llvm-objcopy.h" #include "MachOReader.h" #include "MachOWriter.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/Object/ArchiveWriter.h" -#include "llvm/Object/MachOUniversal.h" -#include "llvm/Object/MachOUniversalWriter.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" @@ -390,74 +386,6 @@ Error executeObjcopyOnBinary(const CopyConfig &Config, return Writer.write(); } -Error executeObjcopyOnMachOUniversalBinary(CopyConfig &Config, - const MachOUniversalBinary &In, - Buffer &Out) { - SmallVector, 2> Binaries; - SmallVector Slices; - for (const auto &O : In.objects()) { - Expected> ArOrErr = O.getAsArchive(); - if (ArOrErr) { - Expected> NewArchiveMembersOrErr = - createNewArchiveMembers(Config, **ArOrErr); - if (!NewArchiveMembersOrErr) - return NewArchiveMembersOrErr.takeError(); - Expected> OutputBufferOrErr = - writeArchiveToBuffer(*NewArchiveMembersOrErr, - (*ArOrErr)->hasSymbolTable(), (*ArOrErr)->kind(), - Config.DeterministicArchives, - (*ArOrErr)->isThin()); - if (!OutputBufferOrErr) - return OutputBufferOrErr.takeError(); - Expected> BinaryOrErr = - object::createBinary(**OutputBufferOrErr); - if (!BinaryOrErr) - return BinaryOrErr.takeError(); - Binaries.emplace_back(std::move(*BinaryOrErr), - std::move(*OutputBufferOrErr)); - Slices.emplace_back(*cast(Binaries.back().getBinary()), - O.getCPUType(), O.getCPUSubType(), - O.getArchFlagName(), O.getAlign()); - continue; - } - // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class - // ObjectForArch return an Error in case of the type mismatch. We need to - // check each in turn to see what kind of slice this is, so ignore errors - // produced along the way. - consumeError(ArOrErr.takeError()); - - Expected> ObjOrErr = O.getAsObjectFile(); - if (!ObjOrErr) { - consumeError(ObjOrErr.takeError()); - return createStringError(std::errc::invalid_argument, - "slice for '%s' of the universal Mach-O binary " - "'%s' is not a Mach-O object or an archive", - O.getArchFlagName().c_str(), - Config.InputFilename.str().c_str()); - } - MemBuffer MB(O.getArchFlagName()); - if (Error E = executeObjcopyOnBinary(Config, **ObjOrErr, MB)) - return E; - std::unique_ptr OutputBuffer = - MB.releaseMemoryBuffer(); - Expected> BinaryOrErr = - object::createBinary(*OutputBuffer); - if (!BinaryOrErr) - return BinaryOrErr.takeError(); - Binaries.emplace_back(std::move(*BinaryOrErr), std::move(OutputBuffer)); - Slices.emplace_back(*cast(Binaries.back().getBinary()), - O.getAlign()); - } - Expected> B = - writeUniversalBinaryToBuffer(Slices); - if (!B) - return B.takeError(); - if (Error E = Out.allocate((*B)->getBufferSize())) - return E; - memcpy(Out.getBufferStart(), (*B)->getBufferStart(), (*B)->getBufferSize()); - return Out.commit(); -} - } // end namespace macho } // end namespace objcopy } // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h index c3f5391f79b6a..f34e361db7ea9 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h +++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h @@ -24,10 +24,6 @@ class Buffer; namespace macho { Error executeObjcopyOnBinary(const CopyConfig &Config, object::MachOObjectFile &In, Buffer &Out); - -Error executeObjcopyOnMachOUniversalBinary( - CopyConfig &Config, const object::MachOUniversalBinary &In, Buffer &Out); - } // end namespace macho } // end namespace objcopy } // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp index 8cd58572f5a19..175f2929eb230 100644 --- a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp @@ -25,7 +25,6 @@ #include "llvm/Object/ELFTypes.h" #include "llvm/Object/Error.h" #include "llvm/Object/MachO.h" -#include "llvm/Object/MachOUniversal.h" #include "llvm/Object/Wasm.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" @@ -145,10 +144,6 @@ static Error executeObjcopyOnBinary(CopyConfig &Config, object::Binary &In, return coff::executeObjcopyOnBinary(Config, *COFFBinary, Out); else if (auto *MachOBinary = dyn_cast(&In)) return macho::executeObjcopyOnBinary(Config, *MachOBinary, Out); - else if (auto *MachOUniversalBinary = - dyn_cast(&In)) - return macho::executeObjcopyOnMachOUniversalBinary( - Config, *MachOUniversalBinary, Out); else if (auto *WasmBinary = dyn_cast(&In)) return objcopy::wasm::executeObjcopyOnBinary(Config, *WasmBinary, Out); else @@ -156,11 +151,7 @@ static Error executeObjcopyOnBinary(CopyConfig &Config, object::Binary &In, "unsupported object file format"); } -namespace llvm { -namespace objcopy { - -Expected> -createNewArchiveMembers(CopyConfig &Config, const Archive &Ar) { +static Error executeObjcopyOnArchive(CopyConfig &Config, const Archive &Ar) { std::vector NewArchiveMembers; Error Err = Error::success(); for (const Archive::Child &Child : Ar.children(Err)) { @@ -175,7 +166,7 @@ createNewArchiveMembers(CopyConfig &Config, const Archive &Ar) { MemBuffer MB(ChildNameOrErr.get()); if (Error E = executeObjcopyOnBinary(Config, *ChildOrErr->get(), MB)) - return std::move(E); + return E; Expected Member = NewArchiveMember::getOldMember(Child, Config.DeterministicArchives); @@ -187,19 +178,8 @@ createNewArchiveMembers(CopyConfig &Config, const Archive &Ar) { } if (Err) return createFileError(Config.InputFilename, std::move(Err)); - return NewArchiveMembers; -} - -} // end namespace objcopy -} // end namespace llvm -static Error executeObjcopyOnArchive(CopyConfig &Config, - const object::Archive &Ar) { - Expected> NewArchiveMembersOrErr = - createNewArchiveMembers(Config, Ar); - if (!NewArchiveMembersOrErr) - return NewArchiveMembersOrErr.takeError(); - return deepWriteArchive(Config.OutputFilename, *NewArchiveMembersOrErr, + return deepWriteArchive(Config.OutputFilename, NewArchiveMembers, Ar.hasSymbolTable(), Ar.kind(), Config.DeterministicArchives, Ar.isThin()); } diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.h b/llvm/tools/llvm-objcopy/llvm-objcopy.h deleted file mode 100644 index 97a166769f954..0000000000000 --- a/llvm/tools/llvm-objcopy/llvm-objcopy.h +++ /dev/null @@ -1,32 +0,0 @@ -//===- llvm-objcopy.h -------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_OBJCOPY_OBJCOPY_H -#define LLVM_TOOLS_OBJCOPY_OBJCOPY_H - -#include "llvm/Support/Error.h" - -namespace llvm { - -struct NewArchiveMember; - -namespace object { - -class Archive; - -} // end namespace object - -namespace objcopy { -struct CopyConfig; -Expected> -createNewArchiveMembers(CopyConfig &Config, const object::Archive &Ar); - -} // end namespace objcopy -} // end namespace llvm - -#endif // LLVM_TOOLS_OBJCOPY_OBJCOPY_H From fe2bd543f5e82bc14ef37dc5ec2228812098cf7a Mon Sep 17 00:00:00 2001 From: Tres Popp Date: Tue, 22 Sep 2020 12:23:43 +0200 Subject: [PATCH 118/321] [mlir] Add file to implement bufferization for shape ops. This adds a shape-bufferize pass and implements the pattern for shape.assuming. Differential Revision: https://reviews.llvm.org/D88083 --- .../mlir/Dialect/Shape/Transforms/Passes.h | 11 +++ .../mlir/Dialect/Shape/Transforms/Passes.td | 5 + .../Dialect/Shape/Transforms/CMakeLists.txt | 1 + .../Shape/Transforms/ShapeTypeConversion.cpp | 98 +++++++++++++++++++ .../Dialect/Shape/shape-type-conversion.mlir | 18 ++++ 5 files changed, 133 insertions(+) create mode 100644 mlir/lib/Dialect/Shape/Transforms/ShapeTypeConversion.cpp create mode 100644 mlir/test/Dialect/Shape/shape-type-conversion.mlir diff --git a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h index 543ffc617a5cb..72816b72f41e6 100644 --- a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h @@ -16,6 +16,10 @@ #include "mlir/Pass/Pass.h" +namespace mlir { +class BufferAssignmentTypeConverter; +} // namespace mlir + namespace mlir { /// Creates an instance of the ShapeToShapeLowering pass that legalizes Shape /// dialect to be convertible to Standard. For example, `shape.num_elements` get @@ -36,6 +40,13 @@ void populateRemoveShapeConstraintsPatterns(OwningRewritePatternList &patterns, MLIRContext *ctx); std::unique_ptr createRemoveShapeConstraintsPass(); +void populateShapeTypeConversionPatterns( + MLIRContext *ctx, BufferAssignmentTypeConverter *converter, + OwningRewritePatternList *patterns); +// Collects a set of patterns to replace tensors as inputs and outputs to shape +// operations with buffers. This only modifies the shape operations. +std::unique_ptr createShapeTensorToMemrefPass(); + //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td index 022bd3773ce22..09cc7a1a5c93c 100644 --- a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td @@ -21,4 +21,9 @@ def ShapeToShapeLowering : FunctionPass<"shape-to-shape-lowering"> { let constructor = "mlir::createShapeToShapeLowering()"; } +// TODO(tpopp): Generalize this to allow any type conversions desired. +def ShapeTensorToMemref : FunctionPass<"shape-tensor-to-memref"> { + let summary = "Replace tensors involving shape operations with memrefs"; + let constructor = "mlir::createShapeTensorToMemrefPass()"; +} #endif // MLIR_DIALECT_SHAPE_TRANSFORMS_PASSES diff --git a/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt index 987f9c544b33b..9df40a0fb7404 100644 --- a/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt @@ -1,5 +1,6 @@ add_mlir_dialect_library(MLIRShapeOpsTransforms RemoveShapeConstraints.cpp + ShapeTypeConversion.cpp ShapeToShapeLowering.cpp ADDITIONAL_HEADER_DIRS diff --git a/mlir/lib/Dialect/Shape/Transforms/ShapeTypeConversion.cpp b/mlir/lib/Dialect/Shape/Transforms/ShapeTypeConversion.cpp new file mode 100644 index 0000000000000..98398fbc70e64 --- /dev/null +++ b/mlir/lib/Dialect/Shape/Transforms/ShapeTypeConversion.cpp @@ -0,0 +1,98 @@ +//=====------- ShapeTypeConversion.cpp - Shape Type Conversions ----------*- C++ +//-*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines patterns to convert types of inputs and outputs to shape +// operations to be memrefs instead of tensors. +// +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" +#include "mlir/Dialect/Shape/IR/Shape.h" +#include "mlir/Dialect/Shape/Transforms/Passes.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/BufferPlacement.h" + +using namespace mlir; +using namespace mlir::shape; + +namespace { +// Propagate tensor to memref conversions through shape.assuming ops. +class TypeConversionAssumingOpConverter + : public BufferAssignmentOpConversionPattern { +public: + using BufferAssignmentOpConversionPattern< + shape::AssumingOp>::BufferAssignmentOpConversionPattern; + + LogicalResult + matchAndRewrite(shape::AssumingOp assumingOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + SmallVector newResultTypes; + newResultTypes.reserve(assumingOp.getNumResults()); + for (auto result : assumingOp.getResults()) { + auto originalType = result.getType(); + Type convertedType = converter->convertType(originalType); + newResultTypes.push_back(convertedType); + } + + auto newAssumingOp = rewriter.create( + assumingOp.getLoc(), newResultTypes, assumingOp.witness()); + + // Handle the region transfer carefully here to avoid assertions that both + // operations are valid at replacement time. + newAssumingOp.doRegion().push_back(new Block()); + rewriter.replaceOp(assumingOp, newAssumingOp.getResults()); + newAssumingOp.doRegion().takeBody(assumingOp.doRegion()); + + return success(); + } +}; + +struct ShapeTensorToMemrefPass + : public ShapeTensorToMemrefBase { + void runOnFunction() override { + MLIRContext &ctx = getContext(); + + OwningRewritePatternList patterns; + BufferAssignmentTypeConverter converter; + populateShapeTypeConversionPatterns(&ctx, &converter, &patterns); + + ConversionTarget target(getContext()); + auto isMemRefType = [](Type type) { return type.isa(); }; + + target.addDynamicallyLegalOp([&](shape::AssumingOp op) { + return std::all_of(op.result_type_begin(), op.result_type_end(), + isMemRefType); + }); + + if (failed(mlir::applyPartialConversion(getFunction(), target, patterns))) + signalPassFailure(); + } +}; + +} // namespace + +/// Populates `patterns` with the conversion patterns of tensor->memref. +// +// TODO(tpopp): Change this to work generally with any type conversions. +void mlir::populateShapeTypeConversionPatterns( + MLIRContext *context, BufferAssignmentTypeConverter *converter, + OwningRewritePatternList *patterns) { + patterns->insert(context, converter); +} + +//===----------------------------------------------------------------------===// +// ShapeTensorToMemrefPass construction +//===----------------------------------------------------------------------===// + +std::unique_ptr mlir::createShapeTensorToMemrefPass() { + return std::make_unique(); +} diff --git a/mlir/test/Dialect/Shape/shape-type-conversion.mlir b/mlir/test/Dialect/Shape/shape-type-conversion.mlir new file mode 100644 index 0000000000000..8985a6da02510 --- /dev/null +++ b/mlir/test/Dialect/Shape/shape-type-conversion.mlir @@ -0,0 +1,18 @@ +// RUN: mlir-opt -allow-unregistered-dialect -split-input-file -shape-tensor-to-memref <%s | FileCheck %s + +// ----- +// Check that shape.assuming returns a memref. +// +// CHECK-LABEL: @shape_assuming_returns_memref +func @shape_assuming_returns_memref() { + %0 = shape.const_witness true + // CHECK: shape.assuming %{{.*}} -> (memref<2xf16>) { + %1 = shape.assuming %0 -> (tensor<2xf16>) { + %2 = "test.source"() : () -> (tensor<2xf16>) + shape.assuming_yield %2 : tensor<2xf16> + } + "test.sink"(%1) : (tensor<2xf16>) -> () + return +} + + From 3a12ff0dac5ab4f0e1f446abe66b451c1df8dac1 Mon Sep 17 00:00:00 2001 From: Pushpinder Singh Date: Mon, 5 Oct 2020 08:59:26 -0400 Subject: [PATCH 119/321] [OpenMP][RTL] Remove dead code RequiresDataSharing was always 0, resulting dead code in device runtime library. Reviewed By: jdoerfert, JonChesterfield Differential Revision: https://reviews.llvm.org/D88829 --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 7 +- clang/test/OpenMP/amdgcn_target_codegen.cpp | 2 +- clang/test/OpenMP/nvptx_SPMD_codegen.cpp | 126 +- .../nvptx_force_full_runtime_SPMD_codegen.cpp | 126 +- .../OpenMP/nvptx_target_parallel_codegen.cpp | 4 +- ...tx_target_parallel_num_threads_codegen.cpp | 4 +- ...vptx_target_parallel_proc_bind_codegen.cpp | 6 +- ...vptx_target_parallel_reduction_codegen.cpp | 1520 ++++++++--------- .../test/OpenMP/nvptx_target_simd_codegen.cpp | 8 +- .../OpenMP/nvptx_target_teams_codegen.cpp | 2 +- ..._teams_distribute_parallel_for_codegen.cpp | 8 +- ...s_distribute_parallel_for_simd_codegen.cpp | 8 +- ...x_target_teams_distribute_simd_codegen.cpp | 8 +- .../deviceRTLs/common/omptarget.h | 42 +- .../deviceRTLs/common/src/omptarget.cu | 12 +- openmp/libomptarget/deviceRTLs/interface.h | 4 +- 16 files changed, 909 insertions(+), 978 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 433256313c12c..5d1856f4f5e7c 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -35,7 +35,7 @@ enum OpenMPRTLFunctionNVPTX { /// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); OMPRTL_NVPTX__kmpc_kernel_deinit, /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, - /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); + /// int16_t RequiresOMPRuntime); OMPRTL_NVPTX__kmpc_spmd_kernel_init, /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2, @@ -1345,8 +1345,7 @@ void CGOpenMPRuntimeGPU::emitSPMDEntryHeader( llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true), /*RequiresOMPRuntime=*/ - Bld.getInt16(RequiresFullRuntime ? 1 : 0), - /*RequiresDataSharing=*/Bld.getInt16(0)}; + Bld.getInt16(RequiresFullRuntime ? 1 : 0)}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); @@ -1561,7 +1560,7 @@ CGOpenMPRuntimeGPU::createNVPTXRuntimeFunction(unsigned Function) { case OMPRTL_NVPTX__kmpc_spmd_kernel_init: { // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); - llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; + llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init"); diff --git a/clang/test/OpenMP/amdgcn_target_codegen.cpp b/clang/test/OpenMP/amdgcn_target_codegen.cpp index 0b6f2d40ffe87..85ef69942a50d 100644 --- a/clang/test/OpenMP/amdgcn_target_codegen.cpp +++ b/clang/test/OpenMP/amdgcn_target_codegen.cpp @@ -32,7 +32,7 @@ int test_amdgcn_target_tid_threads_simd() { // CHECK: [[NUM_THREADS:%.+]] = call i64 @__ockl_get_local_size(i32 0) // CHECK-NEXT: [[VAR:%.+]] = trunc i64 [[NUM_THREADS]] to i32 -// CHECK-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[VAR]], i16 0, i16 0) +// CHECK-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[VAR]], i16 0) #pragma omp target simd for (int i = 0; i < N; i++) { arr[i] = 1; diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp index 5fa820fcba703..6c54818e23d53 100644 --- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp +++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -21,28 +21,28 @@ int a; // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 void foo() { -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] #pragma omp target teams distribute parallel for simd if(a) @@ -67,28 +67,28 @@ void foo() { for (int i = 0; i < 10; ++i) ; int a; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] #pragma omp target teams distribute parallel for lastprivate(a) @@ -112,28 +112,28 @@ int a; #pragma omp target teams distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] #pragma omp target teams @@ -175,28 +175,28 @@ int a; #pragma omp distribute parallel for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] #pragma omp target teams @@ -227,28 +227,28 @@ int a; #pragma omp distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] #pragma omp target @@ -286,22 +286,22 @@ int a; #pragma omp distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] #pragma omp target parallel for if(a) for (int i = 0; i < 10; ++i) @@ -324,28 +324,28 @@ int a; #pragma omp target parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] // CHECK-DAG: [[BAR_LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] // CHECK-DAG: [[BAR_LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] // CHECK-DAG: [[BAR_LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] #pragma omp target parallel if(a) @@ -376,27 +376,27 @@ int a; #pragma omp for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] // CHECK-DAG: [[BAR_LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] // CHECK-DAG: [[BAR_LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] #pragma omp target @@ -434,22 +434,22 @@ int a; #pragma omp for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK-DAG: [[FULL]] #pragma omp target #pragma omp parallel for diff --git a/clang/test/OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp index 395a2d48ff0f9..37e71d0dcec63 100644 --- a/clang/test/OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp +++ b/clang/test/OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp @@ -11,13 +11,13 @@ // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 void foo() { -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) #pragma omp target teams distribute parallel for simd for (int i = 0; i < 10; ++i) ; @@ -40,13 +40,13 @@ void foo() { for (int i = 0; i < 10; ++i) ; int a; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) #pragma omp target teams distribute parallel for lastprivate(a) for (int i = 0; i < 10; ++i) a = i; @@ -68,13 +68,13 @@ int a; #pragma omp target teams distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) #pragma omp target teams #pragma omp distribute parallel for simd for (int i = 0; i < 10; ++i) @@ -103,13 +103,13 @@ int a; #pragma omp distribute parallel for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) #pragma omp target teams #pragma omp distribute parallel for for (int i = 0; i < 10; ++i) @@ -138,13 +138,13 @@ int a; #pragma omp distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) #pragma omp target #pragma omp teams #pragma omp distribute parallel for @@ -180,13 +180,13 @@ int a; #pragma omp distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) #pragma omp target parallel for for (int i = 0; i < 10; ++i) ; @@ -208,13 +208,13 @@ int a; #pragma omp target parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) #pragma omp target parallel #pragma omp for simd for (int i = 0; i < 10; ++i) @@ -243,13 +243,13 @@ int a; #pragma omp for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) #pragma omp target #pragma omp parallel #pragma omp for simd ordered @@ -285,13 +285,13 @@ int a; #pragma omp for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) #pragma omp target #pragma omp parallel for for (int i = 0; i < 10; ++i) diff --git a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp index de8d4e0d234c1..db3d0474c2c0f 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp @@ -61,7 +61,7 @@ int bar(int n){ // CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // @@ -99,7 +99,7 @@ int bar(int n){ // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align // CHECK: [[B:%.+]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp index 93de4b6397ba1..123ade5094a28 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -53,7 +53,7 @@ int bar(int n){ // CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}}) // CHECK: store i32 [[GTID]], i32* [[THREADID:%.+]], @@ -84,7 +84,7 @@ int bar(int n){ // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align // CHECK: [[B:%.+]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}}) // CHECK: store i32 [[GTID]], i32* [[THREADID:%.+]], diff --git a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp index 778b8d3300dfa..7b3de7d462d29 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp @@ -54,7 +54,7 @@ int bar(int n){ } // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l29}}( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // @@ -72,7 +72,7 @@ int bar(int n){ // CHECK: } // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l33}}( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // @@ -90,7 +90,7 @@ int bar(int n){ // CHECK: } // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l38}}( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp index 35240086d3da0..459330d31f660 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -52,774 +52,756 @@ int bar(int n){ return a; } - // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}( - // - // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0) - // CHECK: call void @__kmpc_data_sharing_init_stack_spmd - // CHECK: br label {{%?}}[[EXECUTE:.+]] - // - // CHECK: [[EXECUTE]] - // CHECK: {{call|invoke}} void [[PFN:@.+]](i32* - // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) - // - // - // define internal void [[PFN]]( - // CHECK: store double {{[0\.e\+]+}}, double* [[E:%.+]], align - // CHECK: [[EV:%.+]] = load double, double* [[E]], align - // CHECK: [[ADD:%.+]] = fadd double [[EV]], 5 - // CHECK: store double [[ADD]], double* [[E]], align - // CHECK: [[PTR1:%.+]] = getelementptr inbounds [[RLT:.+]], [1 x i8*]* [[RL:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[E_CAST:%.+]] = bitcast double* [[E]] to i8* - // CHECK: store i8* [[E_CAST]], i8** [[PTR1]], align - // CHECK: [[ARG_RL:%.+]] = bitcast [[RLT]]* [[RL]] to i8* - // CHECK: [[RET:%.+]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @{{.+}}, i32 {{.+}}, i32 1, i{{32|64}} {{4|8}}, i8* [[ARG_RL]], void (i8*, i16, i16, i16)* [[SHUFFLE_REDUCE_FN:@.+]], void (i8*, i32)* [[WARP_COPY_FN:@.+]]) - // CHECK: [[CMP:%.+]] = icmp eq i32 [[RET]], 1 - // CHECK: br i1 [[CMP]], label - - // CHECK: [[E_INV:%.+]] = load double, double* [[E_IN:%.+]], align - // CHECK: [[EV:%.+]] = load double, double* [[E]], align - // CHECK: [[ADD:%.+]] = fadd double [[E_INV]], [[EV]] - // CHECK: store double [[ADD]], double* [[E_IN]], align - // CHECK: call void @__kmpc_nvptx_end_reduce_nowait( - // CHECK: br label - // - // CHECK: ret - - // - // Reduction function - // CHECK: define internal void [[REDUCTION_FUNC:@.+]](i8* %0, i8* %1) - // CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_RHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[VAR_RHS_VOID:%.+]] = load i8*, i8** [[VAR_RHS_REF]], - // CHECK: [[VAR_RHS:%.+]] = bitcast i8* [[VAR_RHS_VOID]] to double* - // - // CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_LHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[VAR_LHS_VOID:%.+]] = load i8*, i8** [[VAR_LHS_REF]], - // CHECK: [[VAR_LHS:%.+]] = bitcast i8* [[VAR_LHS_VOID]] to double* - // - // CHECK: [[VAR_LHS_VAL:%.+]] = load double, double* [[VAR_LHS]], - // CHECK: [[VAR_RHS_VAL:%.+]] = load double, double* [[VAR_RHS]], - // CHECK: [[RES:%.+]] = fadd double [[VAR_LHS_VAL]], [[VAR_RHS_VAL]] - // CHECK: store double [[RES]], double* [[VAR_LHS]], - // CHECK: ret void - - // - // Shuffle and reduce function - // CHECK: define internal void [[SHUFFLE_REDUCE_FN]](i8* %0, i16 {{.*}}, i16 {{.*}}, i16 {{.*}}) - // CHECK: [[REMOTE_RED_LIST:%.+]] = alloca [[RLT]], align - // CHECK: [[REMOTE_ELT:%.+]] = alloca double - // - // CHECK: [[LANEID:%.+]] = load i16, i16* {{.+}}, align - // CHECK: [[LANEOFFSET:%.+]] = load i16, i16* {{.+}}, align - // CHECK: [[ALGVER:%.+]] = load i16, i16* {{.+}}, align - // - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* - // - // CHECK: [[ELT_CAST:%.+]] = bitcast double* [[ELT]] to i64* - // CHECK: [[REMOTE_ELT_CAST:%.+]] = bitcast double* [[REMOTE_ELT]] to i64* - // CHECK: [[ELT_VAL:%.+]] = load i64, i64* [[ELT_CAST]], align - // CHECK: [[WS32:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16 - // CHECK: [[REMOTE_ELT_VAL64:%.+]] = call i64 @__kmpc_shuffle_int64(i64 [[ELT_VAL]], i16 [[LANEOFFSET]], i16 [[WS]]) - // - // CHECK: store i64 [[REMOTE_ELT_VAL64]], i64* [[REMOTE_ELT_CAST]], align - // CHECK: [[REMOTE_ELT_VOID:%.+]] = bitcast double* [[REMOTE_ELT]] to i8* - // CHECK: store i8* [[REMOTE_ELT_VOID]], i8** [[REMOTE_ELT_REF]], align - // - // Condition to reduce - // CHECK: [[CONDALG0:%.+]] = icmp eq i16 [[ALGVER]], 0 - // - // CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 - // CHECK: [[COND2:%.+]] = icmp ult i16 [[LANEID]], [[LANEOFFSET]] - // CHECK: [[CONDALG1:%.+]] = and i1 [[COND1]], [[COND2]] - // - // CHECK: [[COND3:%.+]] = icmp eq i16 [[ALGVER]], 2 - // CHECK: [[COND4:%.+]] = and i16 [[LANEID]], 1 - // CHECK: [[COND5:%.+]] = icmp eq i16 [[COND4]], 0 - // CHECK: [[COND6:%.+]] = and i1 [[COND3]], [[COND5]] - // CHECK: [[COND7:%.+]] = icmp sgt i16 [[LANEOFFSET]], 0 - // CHECK: [[CONDALG2:%.+]] = and i1 [[COND6]], [[COND7]] - // - // CHECK: [[COND8:%.+]] = or i1 [[CONDALG0]], [[CONDALG1]] - // CHECK: [[SHOULD_REDUCE:%.+]] = or i1 [[COND8]], [[CONDALG2]] - // CHECK: br i1 [[SHOULD_REDUCE]], label {{%?}}[[DO_REDUCE:.+]], label {{%?}}[[REDUCE_ELSE:.+]] - // - // CHECK: [[DO_REDUCE]] - // CHECK: [[RED_LIST1_VOID:%.+]] = bitcast [[RLT]]* [[RED_LIST]] to i8* - // CHECK: [[RED_LIST2_VOID:%.+]] = bitcast [[RLT]]* [[REMOTE_RED_LIST]] to i8* - // CHECK: call void [[REDUCTION_FUNC]](i8* [[RED_LIST1_VOID]], i8* [[RED_LIST2_VOID]]) - // CHECK: br label {{%?}}[[REDUCE_CONT:.+]] - // - // CHECK: [[REDUCE_ELSE]] - // CHECK: br label {{%?}}[[REDUCE_CONT]] - // - // CHECK: [[REDUCE_CONT]] - // Now check if we should just copy over the remote reduction list - // CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 - // CHECK: [[COND2:%.+]] = icmp uge i16 [[LANEID]], [[LANEOFFSET]] - // CHECK: [[SHOULD_COPY:%.+]] = and i1 [[COND1]], [[COND2]] - // CHECK: br i1 [[SHOULD_COPY]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] - // - // CHECK: [[DO_COPY]] - // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double* - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* - // CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align - // CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align - // CHECK: br label {{%?}}[[COPY_CONT:.+]] - // - // CHECK: [[COPY_ELSE]] - // CHECK: br label {{%?}}[[COPY_CONT]] - // - // CHECK: [[COPY_CONT]] - // CHECK: void - - // - // Inter warp copy function - // CHECK: define internal void [[WARP_COPY_FN]](i8* %0, i32 %1) - // CHECK-DAG: [[LANEID:%.+]] = and i32 {{.+}}, 31 - // CHECK-DAG: [[WARPID:%.+]] = ashr i32 {{.+}}, 5 - // CHECK-DAG: [[RED_LIST:%.+]] = bitcast i8* {{.+}} to [[RLT]]* - // CHECK: store i32 0, i32* [[CNT_ADDR:%.+]], - // CHECK: br label - // CHECK: [[CNT:%.+]] = load i32, i32* [[CNT_ADDR]], - // CHECK: [[DONE_COPY:%.+]] = icmp ult i32 [[CNT]], 2 - // CHECK: br i1 [[DONE_COPY]], label - // CHECK: call void @__kmpc_barrier(%struct.ident_t* @ - // CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 - // CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] - // - // [[DO_COPY]] - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[BASE_ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* - // CHECK: [[ELT:%.+]] = getelementptr i32, i32* [[BASE_ELT]], i32 [[CNT]] - // - // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] - // CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], - // CHECK: store volatile i32 [[ELT_VAL]], i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], - // CHECK: br label {{%?}}[[COPY_CONT:.+]] - // - // CHECK: [[COPY_ELSE]] - // CHECK: br label {{%?}}[[COPY_CONT]] - // - // Barrier after copy to shared memory storage medium. - // CHECK: [[COPY_CONT]] - // CHECK: call void @__kmpc_barrier(%struct.ident_t* @ - // CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32* - // - // Read into warp 0. - // CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]] - // CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] - // - // CHECK: [[DO_READ]] - // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT_BASE:%.+]] = bitcast i8* [[ELT_VOID]] to i32* - // CHECK: [[ELT:%.+]] = getelementptr i32, i32* [[ELT_BASE]], i32 [[CNT]] - // CHECK: [[MEDIUM_ELT_VAL:%.+]] = load volatile i32, i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], - // CHECK: store i32 [[MEDIUM_ELT_VAL]], i32* [[ELT]], - // CHECK: br label {{%?}}[[READ_CONT:.+]] - // - // CHECK: [[READ_ELSE]] - // CHECK: br label {{%?}}[[READ_CONT]] - // - // CHECK: [[READ_CONT]] - // CHECK: [[NEXT:%.+]] = add nsw i32 [[CNT]], 1 - // CHECK: store i32 [[NEXT]], i32* [[CNT_ADDR]], - // CHECK: br label - // CHECK: ret - - - - - - - - - - - // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}( - // - // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0) - // CHECK: call void @__kmpc_data_sharing_init_stack_spmd - // CHECK: br label {{%?}}[[EXECUTE:.+]] - // - // CHECK: [[EXECUTE]] - // CHECK: {{call|invoke}} void [[PFN1:@.+]](i32* - // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) - // - // - // define internal void [[PFN1]]( - // CHECK: store float {{1\.[0e\+]+}}, float* [[D:%.+]], align - // CHECK: [[C_VAL:%.+]] = load i8, i8* [[C:%.+]], align - // CHECK: [[CONV:%.+]] = sext i8 [[C_VAL]] to i32 - // CHECK: [[XOR:%.+]] = xor i32 [[CONV]], 2 - // CHECK: [[TRUNC:%.+]] = trunc i32 [[XOR]] to i8 - // CHECK: store i8 [[TRUNC]], i8* [[C]], align - // CHECK: [[DV:%.+]] = load float, float* [[D]], align - // CHECK: [[MUL:%.+]] = fmul float [[DV]], {{[0-9e\.\+]+}} - // CHECK: store float [[MUL]], float* [[D]], align - // CHECK: [[PTR1:%.+]] = getelementptr inbounds [[RLT:.+]], [2 x i8*]* [[RL:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: store i8* [[C]], i8** [[PTR1]], align - // CHECK: [[PTR2:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RL]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[D_CAST:%.+]] = bitcast float* [[D]] to i8* - // CHECK: store i8* [[D_CAST]], i8** [[PTR2]], align - // CHECK: [[ARG_RL:%.+]] = bitcast [[RLT]]* [[RL]] to i8* - // CHECK: [[RET:%.+]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @{{.+}}, i32 {{.+}}, i32 2, i{{32|64}} {{8|16}}, i8* [[ARG_RL]], void (i8*, i16, i16, i16)* [[SHUFFLE_REDUCE_FN:@.+]], void (i8*, i32)* [[WARP_COPY_FN:@.+]]) - // CHECK: [[CMP:%.+]] = icmp eq i32 [[RET]], 1 - // CHECK: br i1 [[CMP]], label - // CHECK: [[C_INV8:%.+]] = load i8, i8* [[C_IN:%.+]], align - // CHECK: [[C_INV:%.+]] = sext i8 [[C_INV8]] to i32 - // CHECK: [[CV8:%.+]] = load i8, i8* [[C]], align - // CHECK: [[CV:%.+]] = sext i8 [[CV8]] to i32 - // CHECK: [[XOR:%.+]] = xor i32 [[C_INV]], [[CV]] - // CHECK: [[TRUNC:%.+]] = trunc i32 [[XOR]] to i8 - // CHECK: store i8 [[TRUNC]], i8* [[C_IN]], align - // CHECK: [[D_INV:%.+]] = load float, float* [[D_IN:%.+]], align - // CHECK: [[DV:%.+]] = load float, float* [[D]], align - // CHECK: [[MUL:%.+]] = fmul float [[D_INV]], [[DV]] - // CHECK: store float [[MUL]], float* [[D_IN]], align - // CHECK: call void @__kmpc_nvptx_end_reduce_nowait( - // CHECK: br label - // - // CHECK: ret - - // - // Reduction function - // CHECK: define internal void [[REDUCTION_FUNC:@.+]](i8* %0, i8* %1) - // CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_RHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[VAR1_RHS:%.+]] = load i8*, i8** [[VAR1_RHS_REF]], - // - // CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_LHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[VAR1_LHS:%.+]] = load i8*, i8** [[VAR1_LHS_REF]], - // - // CHECK: [[VAR2_RHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_RHS]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[VAR2_RHS_VOID:%.+]] = load i8*, i8** [[VAR2_RHS_REF]], - // CHECK: [[VAR2_RHS:%.+]] = bitcast i8* [[VAR2_RHS_VOID]] to float* - // - // CHECK: [[VAR2_LHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_LHS]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[VAR2_LHS_VOID:%.+]] = load i8*, i8** [[VAR2_LHS_REF]], - // CHECK: [[VAR2_LHS:%.+]] = bitcast i8* [[VAR2_LHS_VOID]] to float* - // - // CHECK: [[VAR1_LHS_VAL8:%.+]] = load i8, i8* [[VAR1_LHS]], - // CHECK: [[VAR1_LHS_VAL:%.+]] = sext i8 [[VAR1_LHS_VAL8]] to i32 - // CHECK: [[VAR1_RHS_VAL8:%.+]] = load i8, i8* [[VAR1_RHS]], - // CHECK: [[VAR1_RHS_VAL:%.+]] = sext i8 [[VAR1_RHS_VAL8]] to i32 - // CHECK: [[XOR:%.+]] = xor i32 [[VAR1_LHS_VAL]], [[VAR1_RHS_VAL]] - // CHECK: [[RES:%.+]] = trunc i32 [[XOR]] to i8 - // CHECK: store i8 [[RES]], i8* [[VAR1_LHS]], - // - // CHECK: [[VAR2_LHS_VAL:%.+]] = load float, float* [[VAR2_LHS]], - // CHECK: [[VAR2_RHS_VAL:%.+]] = load float, float* [[VAR2_RHS]], - // CHECK: [[RES:%.+]] = fmul float [[VAR2_LHS_VAL]], [[VAR2_RHS_VAL]] - // CHECK: store float [[RES]], float* [[VAR2_LHS]], - // CHECK: ret void - - // - // Shuffle and reduce function - // CHECK: define internal void [[SHUFFLE_REDUCE_FN]](i8* %0, i16 {{.*}}, i16 {{.*}}, i16 {{.*}}) - // CHECK: [[REMOTE_RED_LIST:%.+]] = alloca [[RLT]], align - // CHECK: [[REMOTE_ELT1:%.+]] = alloca i8 - // CHECK: [[REMOTE_ELT2:%.+]] = alloca float - // - // CHECK: [[LANEID:%.+]] = load i16, i16* {{.+}}, align - // CHECK: [[LANEOFFSET:%.+]] = load i16, i16* {{.+}}, align - // CHECK: [[ALGVER:%.+]] = load i16, i16* {{.+}}, align - // - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[ELT_VAL:%.+]] = load i8, i8* [[ELT_VOID]], align - // - // CHECK: [[ELT_CAST:%.+]] = sext i8 [[ELT_VAL]] to i32 - // CHECK: [[WS32:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16 - // CHECK: [[REMOTE_ELT1_VAL32:%.+]] = call i32 @__kmpc_shuffle_int32(i32 [[ELT_CAST]], i16 [[LANEOFFSET]], i16 [[WS]]) - // CHECK: [[REMOTE_ELT1_VAL:%.+]] = trunc i32 [[REMOTE_ELT1_VAL32]] to i8 - // - // CHECK: store i8 [[REMOTE_ELT1_VAL]], i8* [[REMOTE_ELT1]], align - // CHECK: store i8* [[REMOTE_ELT1]], i8** [[REMOTE_ELT_REF]], align - // - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* - // - // CHECK: [[ELT_CAST:%.+]] = bitcast float* [[ELT]] to i32* - // CHECK: [[REMOTE_ELT2_CAST:%.+]] = bitcast float* [[REMOTE_ELT2]] to i32* - // CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT_CAST]], align - // CHECK: [[WS32:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16 - // CHECK: [[REMOTE_ELT2_VAL32:%.+]] = call i32 @__kmpc_shuffle_int32(i32 [[ELT_VAL]], i16 [[LANEOFFSET]], i16 [[WS]]) - // - // CHECK: store i32 [[REMOTE_ELT2_VAL32]], i32* [[REMOTE_ELT2_CAST]], align - // CHECK: [[REMOTE_ELT2C:%.+]] = bitcast float* [[REMOTE_ELT2]] to i8* - // CHECK: store i8* [[REMOTE_ELT2C]], i8** [[REMOTE_ELT_REF]], align - // - // Condition to reduce - // CHECK: [[CONDALG0:%.+]] = icmp eq i16 [[ALGVER]], 0 - // - // CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 - // CHECK: [[COND2:%.+]] = icmp ult i16 [[LANEID]], [[LANEOFFSET]] - // CHECK: [[CONDALG1:%.+]] = and i1 [[COND1]], [[COND2]] - // - // CHECK: [[COND3:%.+]] = icmp eq i16 [[ALGVER]], 2 - // CHECK: [[COND4:%.+]] = and i16 [[LANEID]], 1 - // CHECK: [[COND5:%.+]] = icmp eq i16 [[COND4]], 0 - // CHECK: [[COND6:%.+]] = and i1 [[COND3]], [[COND5]] - // CHECK: [[COND7:%.+]] = icmp sgt i16 [[LANEOFFSET]], 0 - // CHECK: [[CONDALG2:%.+]] = and i1 [[COND6]], [[COND7]] - // - // CHECK: [[COND8:%.+]] = or i1 [[CONDALG0]], [[CONDALG1]] - // CHECK: [[SHOULD_REDUCE:%.+]] = or i1 [[COND8]], [[CONDALG2]] - // CHECK: br i1 [[SHOULD_REDUCE]], label {{%?}}[[DO_REDUCE:.+]], label {{%?}}[[REDUCE_ELSE:.+]] - // - // CHECK: [[DO_REDUCE]] - // CHECK: [[RED_LIST1_VOID:%.+]] = bitcast [[RLT]]* [[RED_LIST]] to i8* - // CHECK: [[RED_LIST2_VOID:%.+]] = bitcast [[RLT]]* [[REMOTE_RED_LIST]] to i8* - // CHECK: call void [[REDUCTION_FUNC]](i8* [[RED_LIST1_VOID]], i8* [[RED_LIST2_VOID]]) - // CHECK: br label {{%?}}[[REDUCE_CONT:.+]] - // - // CHECK: [[REDUCE_ELSE]] - // CHECK: br label {{%?}}[[REDUCE_CONT]] - // - // CHECK: [[REDUCE_CONT]] - // Now check if we should just copy over the remote reduction list - // CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 - // CHECK: [[COND2:%.+]] = icmp uge i16 [[LANEID]], [[LANEOFFSET]] - // CHECK: [[SHOULD_COPY:%.+]] = and i1 [[COND1]], [[COND2]] - // CHECK: br i1 [[SHOULD_COPY]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] - // - // CHECK: [[DO_COPY]] - // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i8, i8* [[REMOTE_ELT_VOID]], align - // CHECK: store i8 [[REMOTE_ELT_VAL]], i8* [[ELT_VOID]], align - // - // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float* - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* - // CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align - // CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align - // CHECK: br label {{%?}}[[COPY_CONT:.+]] - // - // CHECK: [[COPY_ELSE]] - // CHECK: br label {{%?}}[[COPY_CONT]] - // - // CHECK: [[COPY_CONT]] - // CHECK: void - - // - // Inter warp copy function - // CHECK: define internal void [[WARP_COPY_FN]](i8* %0, i32 %1) - // CHECK-DAG: [[LANEID:%.+]] = and i32 {{.+}}, 31 - // CHECK-DAG: [[WARPID:%.+]] = ashr i32 {{.+}}, 5 - // CHECK-DAG: [[RED_LIST:%.+]] = bitcast i8* {{.+}} to [[RLT]]* - // CHECK: call void @__kmpc_barrier(%struct.ident_t* @ - // CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 - // CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] - // - // [[DO_COPY]] - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // - // CHECK: [[MEDIUM_ELT64:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] - // CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT64]] to i8 addrspace([[SHARED_ADDRSPACE]])* - // CHECK: [[ELT_VAL:%.+]] = load i8, i8* [[ELT_VOID]], align - // CHECK: store volatile i8 [[ELT_VAL]], i8 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align - // CHECK: br label {{%?}}[[COPY_CONT:.+]] - // - // CHECK: [[COPY_ELSE]] - // CHECK: br label {{%?}}[[COPY_CONT]] - // - // Barrier after copy to shared memory storage medium. - // CHECK: [[COPY_CONT]] - // CHECK: call void @__kmpc_barrier(%struct.ident_t* @ - // CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32* - // - // Read into warp 0. - // CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]] - // CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] - // - // CHECK: [[DO_READ]] - // CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] - // CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT32]] to i8 addrspace([[SHARED_ADDRSPACE]])* - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[MEDIUM_ELT_VAL:%.+]] = load volatile i8, i8 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align - // CHECK: store i8 [[MEDIUM_ELT_VAL]], i8* [[ELT_VOID]], align - // CHECK: br label {{%?}}[[READ_CONT:.+]] - // - // CHECK: [[READ_ELSE]] - // CHECK: br label {{%?}}[[READ_CONT]] - // - // CHECK: [[READ_CONT]] - // CHECK: call void @__kmpc_barrier(%struct.ident_t* @ - // CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 - // CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] - // - // [[DO_COPY]] - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* - // - // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] - // CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align - // CHECK: store volatile i32 [[ELT_VAL]], i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align - // CHECK: br label {{%?}}[[COPY_CONT:.+]] - // - // CHECK: [[COPY_ELSE]] - // CHECK: br label {{%?}}[[COPY_CONT]] - // - // Barrier after copy to shared memory storage medium. - // CHECK: [[COPY_CONT]] - // CHECK: call void @__kmpc_barrier(%struct.ident_t* @ - // CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32* - // - // Read into warp 0. - // CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]] - // CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] - // - // CHECK: [[DO_READ]] - // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* - // CHECK: [[MEDIUM_ELT_VAL:%.+]] = load volatile i32, i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align - // CHECK: store i32 [[MEDIUM_ELT_VAL]], i32* [[ELT]], align - // CHECK: br label {{%?}}[[READ_CONT:.+]] - // - // CHECK: [[READ_ELSE]] - // CHECK: br label {{%?}}[[READ_CONT]] - // - // CHECK: [[READ_CONT]] - // CHECK: ret - - - - - - - - - - - // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}( - // - // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0) - // CHECK: call void @__kmpc_data_sharing_init_stack_spmd - // CHECK: br label {{%?}}[[EXECUTE:.+]] - // - // CHECK: [[EXECUTE]] - // CHECK: {{call|invoke}} void [[PFN2:@.+]](i32* - // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) - // - // - // define internal void [[PFN2]]( - // CHECK: store i32 0, i32* [[A:%.+]], align - // CHECK: store i16 -32768, i16* [[B:%.+]], align - // CHECK: [[A_VAL:%.+]] = load i32, i32* [[A:%.+]], align - // CHECK: [[OR:%.+]] = or i32 [[A_VAL]], 1 - // CHECK: store i32 [[OR]], i32* [[A]], align - // CHECK: [[BV16:%.+]] = load i16, i16* [[B]], align - // CHECK: [[BV:%.+]] = sext i16 [[BV16]] to i32 - // CHECK: [[CMP:%.+]] = icmp sgt i32 99, [[BV]] - // CHECK: br i1 [[CMP]], label {{%?}}[[DO_MAX:.+]], label {{%?}}[[MAX_ELSE:.+]] - // - // CHECK: [[DO_MAX]] - // CHECK: br label {{%?}}[[MAX_CONT:.+]] - // - // CHECK: [[MAX_ELSE]] - // CHECK: [[BV:%.+]] = load i16, i16* [[B]], align - // CHECK: [[MAX:%.+]] = sext i16 [[BV]] to i32 - // CHECK: br label {{%?}}[[MAX_CONT]] - // - // CHECK: [[MAX_CONT]] - // CHECK: [[B_LVALUE:%.+]] = phi i32 [ 99, %[[DO_MAX]] ], [ [[MAX]], %[[MAX_ELSE]] ] - // CHECK: [[TRUNC:%.+]] = trunc i32 [[B_LVALUE]] to i16 - // CHECK: store i16 [[TRUNC]], i16* [[B]], align - // CHECK: [[PTR1:%.+]] = getelementptr inbounds [[RLT:.+]], [2 x i8*]* [[RL:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[A_CAST:%.+]] = bitcast i32* [[A]] to i8* - // CHECK: store i8* [[A_CAST]], i8** [[PTR1]], align - // CHECK: [[PTR2:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RL]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[B_CAST:%.+]] = bitcast i16* [[B]] to i8* - // CHECK: store i8* [[B_CAST]], i8** [[PTR2]], align - // CHECK: [[ARG_RL:%.+]] = bitcast [[RLT]]* [[RL]] to i8* - // CHECK: [[RET:%.+]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @{{.+}}, i32 {{.+}}, i32 2, i{{32|64}} {{8|16}}, i8* [[ARG_RL]], void (i8*, i16, i16, i16)* [[SHUFFLE_REDUCE_FN:@.+]], void (i8*, i32)* [[WARP_COPY_FN:@.+]]) - // CHECK: [[CMP:%.+]] = icmp eq i32 [[RET]], 1 - // CHECK: br i1 [[CMP]], label - - // CHECK: [[A_INV:%.+]] = load i32, i32* [[A_IN:%.+]], align - // CHECK: [[AV:%.+]] = load i32, i32* [[A]], align - // CHECK: [[OR:%.+]] = or i32 [[A_INV]], [[AV]] - // CHECK: store i32 [[OR]], i32* [[A_IN]], align - // CHECK: [[B_INV16:%.+]] = load i16, i16* [[B_IN:%.+]], align - // CHECK: [[B_INV:%.+]] = sext i16 [[B_INV16]] to i32 - // CHECK: [[BV16:%.+]] = load i16, i16* [[B]], align - // CHECK: [[BV:%.+]] = sext i16 [[BV16]] to i32 - // CHECK: [[CMP:%.+]] = icmp sgt i32 [[B_INV]], [[BV]] - // CHECK: br i1 [[CMP]], label {{%?}}[[DO_MAX:.+]], label {{%?}}[[MAX_ELSE:.+]] - // - // CHECK: [[DO_MAX]] - // CHECK: [[MAX1:%.+]] = load i16, i16* [[B_IN]], align - // CHECK: br label {{%?}}[[MAX_CONT:.+]] - // - // CHECK: [[MAX_ELSE]] - // CHECK: [[MAX2:%.+]] = load i16, i16* [[B]], align - // CHECK: br label {{%?}}[[MAX_CONT]] - // - // CHECK: [[MAX_CONT]] - // CHECK: [[B_MAX:%.+]] = phi i16 [ [[MAX1]], %[[DO_MAX]] ], [ [[MAX2]], %[[MAX_ELSE]] ] - // CHECK: store i16 [[B_MAX]], i16* [[B_IN]], align - // CHECK: call void @__kmpc_nvptx_end_reduce_nowait( - // CHECK: br label - // - // CHECK: ret - - // - // Reduction function - // CHECK: define internal void [[REDUCTION_FUNC:@.+]](i8* %0, i8* %1) - // CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_RHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[VAR1_RHS_VOID:%.+]] = load i8*, i8** [[VAR1_RHS_REF]], - // CHECK: [[VAR1_RHS:%.+]] = bitcast i8* [[VAR1_RHS_VOID]] to i32* - // - // CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_LHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[VAR1_LHS_VOID:%.+]] = load i8*, i8** [[VAR1_LHS_REF]], - // CHECK: [[VAR1_LHS:%.+]] = bitcast i8* [[VAR1_LHS_VOID]] to i32* - // - // CHECK: [[VAR2_RHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_RHS]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[VAR2_RHS_VOID:%.+]] = load i8*, i8** [[VAR2_RHS_REF]], - // CHECK: [[VAR2_RHS:%.+]] = bitcast i8* [[VAR2_RHS_VOID]] to i16* - // - // CHECK: [[VAR2_LHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_LHS]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[VAR2_LHS_VOID:%.+]] = load i8*, i8** [[VAR2_LHS_REF]], - // CHECK: [[VAR2_LHS:%.+]] = bitcast i8* [[VAR2_LHS_VOID]] to i16* - // - // CHECK: [[VAR1_LHS_VAL:%.+]] = load i32, i32* [[VAR1_LHS]], - // CHECK: [[VAR1_RHS_VAL:%.+]] = load i32, i32* [[VAR1_RHS]], - // CHECK: [[OR:%.+]] = or i32 [[VAR1_LHS_VAL]], [[VAR1_RHS_VAL]] - // CHECK: store i32 [[OR]], i32* [[VAR1_LHS]], - // - // CHECK: [[VAR2_LHS_VAL16:%.+]] = load i16, i16* [[VAR2_LHS]], - // CHECK: [[VAR2_LHS_VAL:%.+]] = sext i16 [[VAR2_LHS_VAL16]] to i32 - // CHECK: [[VAR2_RHS_VAL16:%.+]] = load i16, i16* [[VAR2_RHS]], - // CHECK: [[VAR2_RHS_VAL:%.+]] = sext i16 [[VAR2_RHS_VAL16]] to i32 - // - // CHECK: [[CMP:%.+]] = icmp sgt i32 [[VAR2_LHS_VAL]], [[VAR2_RHS_VAL]] - // CHECK: br i1 [[CMP]], label {{%?}}[[DO_MAX:.+]], label {{%?}}[[MAX_ELSE:.+]] - // - // CHECK: [[DO_MAX]] - // CHECK: [[MAX1:%.+]] = load i16, i16* [[VAR2_LHS]], align - // CHECK: br label {{%?}}[[MAX_CONT:.+]] - // - // CHECK: [[MAX_ELSE]] - // CHECK: [[MAX2:%.+]] = load i16, i16* [[VAR2_RHS]], align - // CHECK: br label {{%?}}[[MAX_CONT]] - // - // CHECK: [[MAX_CONT]] - // CHECK: [[MAXV:%.+]] = phi i16 [ [[MAX1]], %[[DO_MAX]] ], [ [[MAX2]], %[[MAX_ELSE]] ] - // CHECK: store i16 [[MAXV]], i16* [[VAR2_LHS]], - // CHECK: ret void - - // - // Shuffle and reduce function - // CHECK: define internal void [[SHUFFLE_REDUCE_FN]](i8* %0, i16 {{.*}}, i16 {{.*}}, i16 {{.*}}) - // CHECK: [[REMOTE_RED_LIST:%.+]] = alloca [[RLT]], align - // CHECK: [[REMOTE_ELT1:%.+]] = alloca i32 - // CHECK: [[REMOTE_ELT2:%.+]] = alloca i16 - // - // CHECK: [[LANEID:%.+]] = load i16, i16* {{.+}}, align - // CHECK: [[LANEOFFSET:%.+]] = load i16, i16* {{.+}}, align - // CHECK: [[ALGVER:%.+]] = load i16, i16* {{.+}}, align - // - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* - // CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align - // - // CHECK: [[WS32:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16 - // CHECK: [[REMOTE_ELT1_VAL:%.+]] = call i32 @__kmpc_shuffle_int32(i32 [[ELT_VAL]], i16 [[LANEOFFSET]], i16 [[WS]]) - // - // CHECK: store i32 [[REMOTE_ELT1_VAL]], i32* [[REMOTE_ELT1]], align - // CHECK: [[REMOTE_ELT1C:%.+]] = bitcast i32* [[REMOTE_ELT1]] to i8* - // CHECK: store i8* [[REMOTE_ELT1C]], i8** [[REMOTE_ELT_REF]], align - // - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* - // CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align - // - // CHECK: [[ELT_CAST:%.+]] = sext i16 [[ELT_VAL]] to i32 - // CHECK: [[WS32:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16 - // CHECK: [[REMOTE_ELT2_VAL32:%.+]] = call i32 @__kmpc_shuffle_int32(i32 [[ELT_CAST]], i16 [[LANEOFFSET]], i16 [[WS]]) - // CHECK: [[REMOTE_ELT2_VAL:%.+]] = trunc i32 [[REMOTE_ELT2_VAL32]] to i16 - // - // CHECK: store i16 [[REMOTE_ELT2_VAL]], i16* [[REMOTE_ELT2]], align - // CHECK: [[REMOTE_ELT2C:%.+]] = bitcast i16* [[REMOTE_ELT2]] to i8* - // CHECK: store i8* [[REMOTE_ELT2C]], i8** [[REMOTE_ELT_REF]], align - // - // Condition to reduce - // CHECK: [[CONDALG0:%.+]] = icmp eq i16 [[ALGVER]], 0 - // - // CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 - // CHECK: [[COND2:%.+]] = icmp ult i16 [[LANEID]], [[LANEOFFSET]] - // CHECK: [[CONDALG1:%.+]] = and i1 [[COND1]], [[COND2]] - // - // CHECK: [[COND3:%.+]] = icmp eq i16 [[ALGVER]], 2 - // CHECK: [[COND4:%.+]] = and i16 [[LANEID]], 1 - // CHECK: [[COND5:%.+]] = icmp eq i16 [[COND4]], 0 - // CHECK: [[COND6:%.+]] = and i1 [[COND3]], [[COND5]] - // CHECK: [[COND7:%.+]] = icmp sgt i16 [[LANEOFFSET]], 0 - // CHECK: [[CONDALG2:%.+]] = and i1 [[COND6]], [[COND7]] - // - // CHECK: [[COND8:%.+]] = or i1 [[CONDALG0]], [[CONDALG1]] - // CHECK: [[SHOULD_REDUCE:%.+]] = or i1 [[COND8]], [[CONDALG2]] - // CHECK: br i1 [[SHOULD_REDUCE]], label {{%?}}[[DO_REDUCE:.+]], label {{%?}}[[REDUCE_ELSE:.+]] - // - // CHECK: [[DO_REDUCE]] - // CHECK: [[RED_LIST1_VOID:%.+]] = bitcast [[RLT]]* [[RED_LIST]] to i8* - // CHECK: [[RED_LIST2_VOID:%.+]] = bitcast [[RLT]]* [[REMOTE_RED_LIST]] to i8* - // CHECK: call void [[REDUCTION_FUNC]](i8* [[RED_LIST1_VOID]], i8* [[RED_LIST2_VOID]]) - // CHECK: br label {{%?}}[[REDUCE_CONT:.+]] - // - // CHECK: [[REDUCE_ELSE]] - // CHECK: br label {{%?}}[[REDUCE_CONT]] - // - // CHECK: [[REDUCE_CONT]] - // Now check if we should just copy over the remote reduction list - // CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 - // CHECK: [[COND2:%.+]] = icmp uge i16 [[LANEID]], [[LANEOFFSET]] - // CHECK: [[SHOULD_COPY:%.+]] = and i1 [[COND1]], [[COND2]] - // CHECK: br i1 [[SHOULD_COPY]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] - // - // CHECK: [[DO_COPY]] - // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32* - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* - // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align - // CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align - // - // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16* - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* - // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align - // CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align - // CHECK: br label {{%?}}[[COPY_CONT:.+]] - // - // CHECK: [[COPY_ELSE]] - // CHECK: br label {{%?}}[[COPY_CONT]] - // - // CHECK: [[COPY_CONT]] - // CHECK: void - - // - // Inter warp copy function - // CHECK: define internal void [[WARP_COPY_FN]](i8* %0, i32 %1) - // CHECK-DAG: [[LANEID:%.+]] = and i32 {{.+}}, 31 - // CHECK-DAG: [[WARPID:%.+]] = ashr i32 {{.+}}, 5 - // CHECK-DAG: [[RED_LIST:%.+]] = bitcast i8* {{.+}} to [[RLT]]* - // CHECK: call void @__kmpc_barrier(%struct.ident_t* @ - // CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 - // CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] - // - // [[DO_COPY]] - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* - // - // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] - // CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align - // CHECK: store volatile i32 [[ELT_VAL]], i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align - // CHECK: br label {{%?}}[[COPY_CONT:.+]] - // - // CHECK: [[COPY_ELSE]] - // CHECK: br label {{%?}}[[COPY_CONT]] - // - // Barrier after copy to shared memory storage medium. - // CHECK: [[COPY_CONT]] - // CHECK: call void @__kmpc_barrier(%struct.ident_t* @ - // CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32* - // - // Read into warp 0. - // CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]] - // CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] - // - // CHECK: [[DO_READ]] - // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* - // CHECK: [[MEDIUM_ELT_VAL:%.+]] = load volatile i32, i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align - // CHECK: store i32 [[MEDIUM_ELT_VAL]], i32* [[ELT]], align - // CHECK: br label {{%?}}[[READ_CONT:.+]] - // - // CHECK: [[READ_ELSE]] - // CHECK: br label {{%?}}[[READ_CONT]] - // - // CHECK: [[READ_CONT]] - // CHECK: call void @__kmpc_barrier(%struct.ident_t* @ - // CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 - // CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] - // - // [[DO_COPY]] - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* - // - // CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] - // CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT32]] to i16 addrspace([[SHARED_ADDRSPACE]])* - // CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align - // CHECK: store volatile i16 [[ELT_VAL]], i16 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align - // CHECK: br label {{%?}}[[COPY_CONT:.+]] - // - // CHECK: [[COPY_ELSE]] - // CHECK: br label {{%?}}[[COPY_CONT]] - // - // Barrier after copy to shared memory storage medium. - // CHECK: [[COPY_CONT]] - // CHECK: call void @__kmpc_barrier(%struct.ident_t* @ - // CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32* - // - // Read into warp 0. - // CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]] - // CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] - // - // CHECK: [[DO_READ]] - // CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] - // CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT32]] to i16 addrspace([[SHARED_ADDRSPACE]])* - // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 1 - // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* - // CHECK: [[MEDIUM_ELT_VAL:%.+]] = load volatile i16, i16 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align - // CHECK: store i16 [[MEDIUM_ELT_VAL]], i16* [[ELT]], align - // CHECK: br label {{%?}}[[READ_CONT:.+]] - // - // CHECK: [[READ_ELSE]] - // CHECK: br label {{%?}}[[READ_CONT]] - // - // CHECK: [[READ_CONT]] - // CHECK: ret +// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}( +// +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_data_sharing_init_stack_spmd +// CHECK: br label {{%?}}[[EXECUTE:.+]] +// +// CHECK: [[EXECUTE]] +// CHECK: {{call|invoke}} void [[PFN:@.+]](i32* +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) +// +// +// define internal void [[PFN]]( +// CHECK: store double {{[0\.e\+]+}}, double* [[E:%.+]], align +// CHECK: [[EV:%.+]] = load double, double* [[E]], align +// CHECK: [[ADD:%.+]] = fadd double [[EV]], 5 +// CHECK: store double [[ADD]], double* [[E]], align +// CHECK: [[PTR1:%.+]] = getelementptr inbounds [[RLT:.+]], [1 x i8*]* [[RL:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[E_CAST:%.+]] = bitcast double* [[E]] to i8* +// CHECK: store i8* [[E_CAST]], i8** [[PTR1]], align +// CHECK: [[ARG_RL:%.+]] = bitcast [[RLT]]* [[RL]] to i8* +// CHECK: [[RET:%.+]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @{{.+}}, i32 {{.+}}, i32 1, i{{32|64}} {{4|8}}, i8* [[ARG_RL]], void (i8*, i16, i16, i16)* [[SHUFFLE_REDUCE_FN:@.+]], void (i8*, i32)* [[WARP_COPY_FN:@.+]]) +// CHECK: [[CMP:%.+]] = icmp eq i32 [[RET]], 1 +// CHECK: br i1 [[CMP]], label + +// CHECK: [[E_INV:%.+]] = load double, double* [[E_IN:%.+]], align +// CHECK: [[EV:%.+]] = load double, double* [[E]], align +// CHECK: [[ADD:%.+]] = fadd double [[E_INV]], [[EV]] +// CHECK: store double [[ADD]], double* [[E_IN]], align +// CHECK: call void @__kmpc_nvptx_end_reduce_nowait( +// CHECK: br label +// +// CHECK: ret + +// +// Reduction function +// CHECK: define internal void [[REDUCTION_FUNC:@.+]](i8* %0, i8* %1) +// CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_RHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[VAR_RHS_VOID:%.+]] = load i8*, i8** [[VAR_RHS_REF]], +// CHECK: [[VAR_RHS:%.+]] = bitcast i8* [[VAR_RHS_VOID]] to double* +// +// CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_LHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[VAR_LHS_VOID:%.+]] = load i8*, i8** [[VAR_LHS_REF]], +// CHECK: [[VAR_LHS:%.+]] = bitcast i8* [[VAR_LHS_VOID]] to double* +// +// CHECK: [[VAR_LHS_VAL:%.+]] = load double, double* [[VAR_LHS]], +// CHECK: [[VAR_RHS_VAL:%.+]] = load double, double* [[VAR_RHS]], +// CHECK: [[RES:%.+]] = fadd double [[VAR_LHS_VAL]], [[VAR_RHS_VAL]] +// CHECK: store double [[RES]], double* [[VAR_LHS]], +// CHECK: ret void + +// +// Shuffle and reduce function +// CHECK: define internal void [[SHUFFLE_REDUCE_FN]](i8* %0, i16 {{.*}}, i16 {{.*}}, i16 {{.*}}) +// CHECK: [[REMOTE_RED_LIST:%.+]] = alloca [[RLT]], align +// CHECK: [[REMOTE_ELT:%.+]] = alloca double +// +// CHECK: [[LANEID:%.+]] = load i16, i16* {{.+}}, align +// CHECK: [[LANEOFFSET:%.+]] = load i16, i16* {{.+}}, align +// CHECK: [[ALGVER:%.+]] = load i16, i16* {{.+}}, align +// +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* +// +// CHECK: [[ELT_CAST:%.+]] = bitcast double* [[ELT]] to i64* +// CHECK: [[REMOTE_ELT_CAST:%.+]] = bitcast double* [[REMOTE_ELT]] to i64* +// CHECK: [[ELT_VAL:%.+]] = load i64, i64* [[ELT_CAST]], align +// CHECK: [[WS32:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16 +// CHECK: [[REMOTE_ELT_VAL64:%.+]] = call i64 @__kmpc_shuffle_int64(i64 [[ELT_VAL]], i16 [[LANEOFFSET]], i16 [[WS]]) +// +// CHECK: store i64 [[REMOTE_ELT_VAL64]], i64* [[REMOTE_ELT_CAST]], align +// CHECK: [[REMOTE_ELT_VOID:%.+]] = bitcast double* [[REMOTE_ELT]] to i8* +// CHECK: store i8* [[REMOTE_ELT_VOID]], i8** [[REMOTE_ELT_REF]], align +// +// Condition to reduce +// CHECK: [[CONDALG0:%.+]] = icmp eq i16 [[ALGVER]], 0 +// +// CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 +// CHECK: [[COND2:%.+]] = icmp ult i16 [[LANEID]], [[LANEOFFSET]] +// CHECK: [[CONDALG1:%.+]] = and i1 [[COND1]], [[COND2]] +// +// CHECK: [[COND3:%.+]] = icmp eq i16 [[ALGVER]], 2 +// CHECK: [[COND4:%.+]] = and i16 [[LANEID]], 1 +// CHECK: [[COND5:%.+]] = icmp eq i16 [[COND4]], 0 +// CHECK: [[COND6:%.+]] = and i1 [[COND3]], [[COND5]] +// CHECK: [[COND7:%.+]] = icmp sgt i16 [[LANEOFFSET]], 0 +// CHECK: [[CONDALG2:%.+]] = and i1 [[COND6]], [[COND7]] +// +// CHECK: [[COND8:%.+]] = or i1 [[CONDALG0]], [[CONDALG1]] +// CHECK: [[SHOULD_REDUCE:%.+]] = or i1 [[COND8]], [[CONDALG2]] +// CHECK: br i1 [[SHOULD_REDUCE]], label {{%?}}[[DO_REDUCE:.+]], label {{%?}}[[REDUCE_ELSE:.+]] +// +// CHECK: [[DO_REDUCE]] +// CHECK: [[RED_LIST1_VOID:%.+]] = bitcast [[RLT]]* [[RED_LIST]] to i8* +// CHECK: [[RED_LIST2_VOID:%.+]] = bitcast [[RLT]]* [[REMOTE_RED_LIST]] to i8* +// CHECK: call void [[REDUCTION_FUNC]](i8* [[RED_LIST1_VOID]], i8* [[RED_LIST2_VOID]]) +// CHECK: br label {{%?}}[[REDUCE_CONT:.+]] +// +// CHECK: [[REDUCE_ELSE]] +// CHECK: br label {{%?}}[[REDUCE_CONT]] +// +// CHECK: [[REDUCE_CONT]] +// Now check if we should just copy over the remote reduction list +// CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 +// CHECK: [[COND2:%.+]] = icmp uge i16 [[LANEID]], [[LANEOFFSET]] +// CHECK: [[SHOULD_COPY:%.+]] = and i1 [[COND1]], [[COND2]] +// CHECK: br i1 [[SHOULD_COPY]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] +// +// CHECK: [[DO_COPY]] +// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double* +// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* +// CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align +// CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align +// CHECK: br label {{%?}}[[COPY_CONT:.+]] +// +// CHECK: [[COPY_ELSE]] +// CHECK: br label {{%?}}[[COPY_CONT]] +// +// CHECK: [[COPY_CONT]] +// CHECK: void + +// +// Inter warp copy function +// CHECK: define internal void [[WARP_COPY_FN]](i8* %0, i32 %1) +// CHECK-DAG: [[LANEID:%.+]] = and i32 {{.+}}, 31 +// CHECK-DAG: [[WARPID:%.+]] = ashr i32 {{.+}}, 5 +// CHECK-DAG: [[RED_LIST:%.+]] = bitcast i8* {{.+}} to [[RLT]]* +// CHECK: store i32 0, i32* [[CNT_ADDR:%.+]], +// CHECK: br label +// CHECK: [[CNT:%.+]] = load i32, i32* [[CNT_ADDR]], +// CHECK: [[DONE_COPY:%.+]] = icmp ult i32 [[CNT]], 2 +// CHECK: br i1 [[DONE_COPY]], label +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ +// CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 +// CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] +// +// [[DO_COPY]] +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[BASE_ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* +// CHECK: [[ELT:%.+]] = getelementptr i32, i32* [[BASE_ELT]], i32 [[CNT]] +// +// CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] +// CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], +// CHECK: store volatile i32 [[ELT_VAL]], i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], +// CHECK: br label {{%?}}[[COPY_CONT:.+]] +// +// CHECK: [[COPY_ELSE]] +// CHECK: br label {{%?}}[[COPY_CONT]] +// +// Barrier after copy to shared memory storage medium. +// CHECK: [[COPY_CONT]] +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ +// CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32* +// +// Read into warp 0. +// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]] +// CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] +// +// CHECK: [[DO_READ]] +// CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[ELT_BASE:%.+]] = bitcast i8* [[ELT_VOID]] to i32* +// CHECK: [[ELT:%.+]] = getelementptr i32, i32* [[ELT_BASE]], i32 [[CNT]] +// CHECK: [[MEDIUM_ELT_VAL:%.+]] = load volatile i32, i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], +// CHECK: store i32 [[MEDIUM_ELT_VAL]], i32* [[ELT]], +// CHECK: br label {{%?}}[[READ_CONT:.+]] +// +// CHECK: [[READ_ELSE]] +// CHECK: br label {{%?}}[[READ_CONT]] +// +// CHECK: [[READ_CONT]] +// CHECK: [[NEXT:%.+]] = add nsw i32 [[CNT]], 1 +// CHECK: store i32 [[NEXT]], i32* [[CNT_ADDR]], +// CHECK: br label +// CHECK: ret + +// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}( +// +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_data_sharing_init_stack_spmd +// CHECK: br label {{%?}}[[EXECUTE:.+]] +// +// CHECK: [[EXECUTE]] +// CHECK: {{call|invoke}} void [[PFN1:@.+]](i32* +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) +// +// +// define internal void [[PFN1]]( +// CHECK: store float {{1\.[0e\+]+}}, float* [[D:%.+]], align +// CHECK: [[C_VAL:%.+]] = load i8, i8* [[C:%.+]], align +// CHECK: [[CONV:%.+]] = sext i8 [[C_VAL]] to i32 +// CHECK: [[XOR:%.+]] = xor i32 [[CONV]], 2 +// CHECK: [[TRUNC:%.+]] = trunc i32 [[XOR]] to i8 +// CHECK: store i8 [[TRUNC]], i8* [[C]], align +// CHECK: [[DV:%.+]] = load float, float* [[D]], align +// CHECK: [[MUL:%.+]] = fmul float [[DV]], {{[0-9e\.\+]+}} +// CHECK: store float [[MUL]], float* [[D]], align +// CHECK: [[PTR1:%.+]] = getelementptr inbounds [[RLT:.+]], [2 x i8*]* [[RL:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: store i8* [[C]], i8** [[PTR1]], align +// CHECK: [[PTR2:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RL]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[D_CAST:%.+]] = bitcast float* [[D]] to i8* +// CHECK: store i8* [[D_CAST]], i8** [[PTR2]], align +// CHECK: [[ARG_RL:%.+]] = bitcast [[RLT]]* [[RL]] to i8* +// CHECK: [[RET:%.+]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @{{.+}}, i32 {{.+}}, i32 2, i{{32|64}} {{8|16}}, i8* [[ARG_RL]], void (i8*, i16, i16, i16)* [[SHUFFLE_REDUCE_FN:@.+]], void (i8*, i32)* [[WARP_COPY_FN:@.+]]) +// CHECK: [[CMP:%.+]] = icmp eq i32 [[RET]], 1 +// CHECK: br i1 [[CMP]], label +// CHECK: [[C_INV8:%.+]] = load i8, i8* [[C_IN:%.+]], align +// CHECK: [[C_INV:%.+]] = sext i8 [[C_INV8]] to i32 +// CHECK: [[CV8:%.+]] = load i8, i8* [[C]], align +// CHECK: [[CV:%.+]] = sext i8 [[CV8]] to i32 +// CHECK: [[XOR:%.+]] = xor i32 [[C_INV]], [[CV]] +// CHECK: [[TRUNC:%.+]] = trunc i32 [[XOR]] to i8 +// CHECK: store i8 [[TRUNC]], i8* [[C_IN]], align +// CHECK: [[D_INV:%.+]] = load float, float* [[D_IN:%.+]], align +// CHECK: [[DV:%.+]] = load float, float* [[D]], align +// CHECK: [[MUL:%.+]] = fmul float [[D_INV]], [[DV]] +// CHECK: store float [[MUL]], float* [[D_IN]], align +// CHECK: call void @__kmpc_nvptx_end_reduce_nowait( +// CHECK: br label +// +// CHECK: ret + +// +// Reduction function +// CHECK: define internal void [[REDUCTION_FUNC:@.+]](i8* %0, i8* %1) +// CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_RHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[VAR1_RHS:%.+]] = load i8*, i8** [[VAR1_RHS_REF]], +// +// CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_LHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[VAR1_LHS:%.+]] = load i8*, i8** [[VAR1_LHS_REF]], +// +// CHECK: [[VAR2_RHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_RHS]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[VAR2_RHS_VOID:%.+]] = load i8*, i8** [[VAR2_RHS_REF]], +// CHECK: [[VAR2_RHS:%.+]] = bitcast i8* [[VAR2_RHS_VOID]] to float* +// +// CHECK: [[VAR2_LHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_LHS]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[VAR2_LHS_VOID:%.+]] = load i8*, i8** [[VAR2_LHS_REF]], +// CHECK: [[VAR2_LHS:%.+]] = bitcast i8* [[VAR2_LHS_VOID]] to float* +// +// CHECK: [[VAR1_LHS_VAL8:%.+]] = load i8, i8* [[VAR1_LHS]], +// CHECK: [[VAR1_LHS_VAL:%.+]] = sext i8 [[VAR1_LHS_VAL8]] to i32 +// CHECK: [[VAR1_RHS_VAL8:%.+]] = load i8, i8* [[VAR1_RHS]], +// CHECK: [[VAR1_RHS_VAL:%.+]] = sext i8 [[VAR1_RHS_VAL8]] to i32 +// CHECK: [[XOR:%.+]] = xor i32 [[VAR1_LHS_VAL]], [[VAR1_RHS_VAL]] +// CHECK: [[RES:%.+]] = trunc i32 [[XOR]] to i8 +// CHECK: store i8 [[RES]], i8* [[VAR1_LHS]], +// +// CHECK: [[VAR2_LHS_VAL:%.+]] = load float, float* [[VAR2_LHS]], +// CHECK: [[VAR2_RHS_VAL:%.+]] = load float, float* [[VAR2_RHS]], +// CHECK: [[RES:%.+]] = fmul float [[VAR2_LHS_VAL]], [[VAR2_RHS_VAL]] +// CHECK: store float [[RES]], float* [[VAR2_LHS]], +// CHECK: ret void + +// +// Shuffle and reduce function +// CHECK: define internal void [[SHUFFLE_REDUCE_FN]](i8* %0, i16 {{.*}}, i16 {{.*}}, i16 {{.*}}) +// CHECK: [[REMOTE_RED_LIST:%.+]] = alloca [[RLT]], align +// CHECK: [[REMOTE_ELT1:%.+]] = alloca i8 +// CHECK: [[REMOTE_ELT2:%.+]] = alloca float +// +// CHECK: [[LANEID:%.+]] = load i16, i16* {{.+}}, align +// CHECK: [[LANEOFFSET:%.+]] = load i16, i16* {{.+}}, align +// CHECK: [[ALGVER:%.+]] = load i16, i16* {{.+}}, align +// +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[ELT_VAL:%.+]] = load i8, i8* [[ELT_VOID]], align +// +// CHECK: [[ELT_CAST:%.+]] = sext i8 [[ELT_VAL]] to i32 +// CHECK: [[WS32:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16 +// CHECK: [[REMOTE_ELT1_VAL32:%.+]] = call i32 @__kmpc_shuffle_int32(i32 [[ELT_CAST]], i16 [[LANEOFFSET]], i16 [[WS]]) +// CHECK: [[REMOTE_ELT1_VAL:%.+]] = trunc i32 [[REMOTE_ELT1_VAL32]] to i8 +// +// CHECK: store i8 [[REMOTE_ELT1_VAL]], i8* [[REMOTE_ELT1]], align +// CHECK: store i8* [[REMOTE_ELT1]], i8** [[REMOTE_ELT_REF]], align +// +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* +// +// CHECK: [[ELT_CAST:%.+]] = bitcast float* [[ELT]] to i32* +// CHECK: [[REMOTE_ELT2_CAST:%.+]] = bitcast float* [[REMOTE_ELT2]] to i32* +// CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT_CAST]], align +// CHECK: [[WS32:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16 +// CHECK: [[REMOTE_ELT2_VAL32:%.+]] = call i32 @__kmpc_shuffle_int32(i32 [[ELT_VAL]], i16 [[LANEOFFSET]], i16 [[WS]]) +// +// CHECK: store i32 [[REMOTE_ELT2_VAL32]], i32* [[REMOTE_ELT2_CAST]], align +// CHECK: [[REMOTE_ELT2C:%.+]] = bitcast float* [[REMOTE_ELT2]] to i8* +// CHECK: store i8* [[REMOTE_ELT2C]], i8** [[REMOTE_ELT_REF]], align +// +// Condition to reduce +// CHECK: [[CONDALG0:%.+]] = icmp eq i16 [[ALGVER]], 0 +// +// CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 +// CHECK: [[COND2:%.+]] = icmp ult i16 [[LANEID]], [[LANEOFFSET]] +// CHECK: [[CONDALG1:%.+]] = and i1 [[COND1]], [[COND2]] +// +// CHECK: [[COND3:%.+]] = icmp eq i16 [[ALGVER]], 2 +// CHECK: [[COND4:%.+]] = and i16 [[LANEID]], 1 +// CHECK: [[COND5:%.+]] = icmp eq i16 [[COND4]], 0 +// CHECK: [[COND6:%.+]] = and i1 [[COND3]], [[COND5]] +// CHECK: [[COND7:%.+]] = icmp sgt i16 [[LANEOFFSET]], 0 +// CHECK: [[CONDALG2:%.+]] = and i1 [[COND6]], [[COND7]] +// +// CHECK: [[COND8:%.+]] = or i1 [[CONDALG0]], [[CONDALG1]] +// CHECK: [[SHOULD_REDUCE:%.+]] = or i1 [[COND8]], [[CONDALG2]] +// CHECK: br i1 [[SHOULD_REDUCE]], label {{%?}}[[DO_REDUCE:.+]], label {{%?}}[[REDUCE_ELSE:.+]] +// +// CHECK: [[DO_REDUCE]] +// CHECK: [[RED_LIST1_VOID:%.+]] = bitcast [[RLT]]* [[RED_LIST]] to i8* +// CHECK: [[RED_LIST2_VOID:%.+]] = bitcast [[RLT]]* [[REMOTE_RED_LIST]] to i8* +// CHECK: call void [[REDUCTION_FUNC]](i8* [[RED_LIST1_VOID]], i8* [[RED_LIST2_VOID]]) +// CHECK: br label {{%?}}[[REDUCE_CONT:.+]] +// +// CHECK: [[REDUCE_ELSE]] +// CHECK: br label {{%?}}[[REDUCE_CONT]] +// +// CHECK: [[REDUCE_CONT]] +// Now check if we should just copy over the remote reduction list +// CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 +// CHECK: [[COND2:%.+]] = icmp uge i16 [[LANEID]], [[LANEOFFSET]] +// CHECK: [[SHOULD_COPY:%.+]] = and i1 [[COND1]], [[COND2]] +// CHECK: br i1 [[SHOULD_COPY]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] +// +// CHECK: [[DO_COPY]] +// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i8, i8* [[REMOTE_ELT_VOID]], align +// CHECK: store i8 [[REMOTE_ELT_VAL]], i8* [[ELT_VOID]], align +// +// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float* +// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* +// CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align +// CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align +// CHECK: br label {{%?}}[[COPY_CONT:.+]] +// +// CHECK: [[COPY_ELSE]] +// CHECK: br label {{%?}}[[COPY_CONT]] +// +// CHECK: [[COPY_CONT]] +// CHECK: void + +// +// Inter warp copy function +// CHECK: define internal void [[WARP_COPY_FN]](i8* %0, i32 %1) +// CHECK-DAG: [[LANEID:%.+]] = and i32 {{.+}}, 31 +// CHECK-DAG: [[WARPID:%.+]] = ashr i32 {{.+}}, 5 +// CHECK-DAG: [[RED_LIST:%.+]] = bitcast i8* {{.+}} to [[RLT]]* +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ +// CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 +// CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] +// +// [[DO_COPY]] +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// +// CHECK: [[MEDIUM_ELT64:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] +// CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT64]] to i8 addrspace([[SHARED_ADDRSPACE]])* +// CHECK: [[ELT_VAL:%.+]] = load i8, i8* [[ELT_VOID]], align +// CHECK: store volatile i8 [[ELT_VAL]], i8 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align +// CHECK: br label {{%?}}[[COPY_CONT:.+]] +// +// CHECK: [[COPY_ELSE]] +// CHECK: br label {{%?}}[[COPY_CONT]] +// +// Barrier after copy to shared memory storage medium. +// CHECK: [[COPY_CONT]] +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ +// CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32* +// +// Read into warp 0. +// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]] +// CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] +// +// CHECK: [[DO_READ]] +// CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] +// CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT32]] to i8 addrspace([[SHARED_ADDRSPACE]])* +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[MEDIUM_ELT_VAL:%.+]] = load volatile i8, i8 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align +// CHECK: store i8 [[MEDIUM_ELT_VAL]], i8* [[ELT_VOID]], align +// CHECK: br label {{%?}}[[READ_CONT:.+]] +// +// CHECK: [[READ_ELSE]] +// CHECK: br label {{%?}}[[READ_CONT]] +// +// CHECK: [[READ_CONT]] +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ +// CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 +// CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] +// +// [[DO_COPY]] +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* +// +// CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] +// CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align +// CHECK: store volatile i32 [[ELT_VAL]], i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align +// CHECK: br label {{%?}}[[COPY_CONT:.+]] +// +// CHECK: [[COPY_ELSE]] +// CHECK: br label {{%?}}[[COPY_CONT]] +// +// Barrier after copy to shared memory storage medium. +// CHECK: [[COPY_CONT]] +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ +// CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32* +// +// Read into warp 0. +// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]] +// CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] +// +// CHECK: [[DO_READ]] +// CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* +// CHECK: [[MEDIUM_ELT_VAL:%.+]] = load volatile i32, i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align +// CHECK: store i32 [[MEDIUM_ELT_VAL]], i32* [[ELT]], align +// CHECK: br label {{%?}}[[READ_CONT:.+]] +// +// CHECK: [[READ_ELSE]] +// CHECK: br label {{%?}}[[READ_CONT]] +// +// CHECK: [[READ_CONT]] +// CHECK: ret + +// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}( +// +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) +// CHECK: call void @__kmpc_data_sharing_init_stack_spmd +// CHECK: br label {{%?}}[[EXECUTE:.+]] +// +// CHECK: [[EXECUTE]] +// CHECK: {{call|invoke}} void [[PFN2:@.+]](i32* +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) +// +// +// define internal void [[PFN2]]( +// CHECK: store i32 0, i32* [[A:%.+]], align +// CHECK: store i16 -32768, i16* [[B:%.+]], align +// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A:%.+]], align +// CHECK: [[OR:%.+]] = or i32 [[A_VAL]], 1 +// CHECK: store i32 [[OR]], i32* [[A]], align +// CHECK: [[BV16:%.+]] = load i16, i16* [[B]], align +// CHECK: [[BV:%.+]] = sext i16 [[BV16]] to i32 +// CHECK: [[CMP:%.+]] = icmp sgt i32 99, [[BV]] +// CHECK: br i1 [[CMP]], label {{%?}}[[DO_MAX:.+]], label {{%?}}[[MAX_ELSE:.+]] +// +// CHECK: [[DO_MAX]] +// CHECK: br label {{%?}}[[MAX_CONT:.+]] +// +// CHECK: [[MAX_ELSE]] +// CHECK: [[BV:%.+]] = load i16, i16* [[B]], align +// CHECK: [[MAX:%.+]] = sext i16 [[BV]] to i32 +// CHECK: br label {{%?}}[[MAX_CONT]] +// +// CHECK: [[MAX_CONT]] +// CHECK: [[B_LVALUE:%.+]] = phi i32 [ 99, %[[DO_MAX]] ], [ [[MAX]], %[[MAX_ELSE]] ] +// CHECK: [[TRUNC:%.+]] = trunc i32 [[B_LVALUE]] to i16 +// CHECK: store i16 [[TRUNC]], i16* [[B]], align +// CHECK: [[PTR1:%.+]] = getelementptr inbounds [[RLT:.+]], [2 x i8*]* [[RL:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[A_CAST:%.+]] = bitcast i32* [[A]] to i8* +// CHECK: store i8* [[A_CAST]], i8** [[PTR1]], align +// CHECK: [[PTR2:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RL]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[B_CAST:%.+]] = bitcast i16* [[B]] to i8* +// CHECK: store i8* [[B_CAST]], i8** [[PTR2]], align +// CHECK: [[ARG_RL:%.+]] = bitcast [[RLT]]* [[RL]] to i8* +// CHECK: [[RET:%.+]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @{{.+}}, i32 {{.+}}, i32 2, i{{32|64}} {{8|16}}, i8* [[ARG_RL]], void (i8*, i16, i16, i16)* [[SHUFFLE_REDUCE_FN:@.+]], void (i8*, i32)* [[WARP_COPY_FN:@.+]]) +// CHECK: [[CMP:%.+]] = icmp eq i32 [[RET]], 1 +// CHECK: br i1 [[CMP]], label + +// CHECK: [[A_INV:%.+]] = load i32, i32* [[A_IN:%.+]], align +// CHECK: [[AV:%.+]] = load i32, i32* [[A]], align +// CHECK: [[OR:%.+]] = or i32 [[A_INV]], [[AV]] +// CHECK: store i32 [[OR]], i32* [[A_IN]], align +// CHECK: [[B_INV16:%.+]] = load i16, i16* [[B_IN:%.+]], align +// CHECK: [[B_INV:%.+]] = sext i16 [[B_INV16]] to i32 +// CHECK: [[BV16:%.+]] = load i16, i16* [[B]], align +// CHECK: [[BV:%.+]] = sext i16 [[BV16]] to i32 +// CHECK: [[CMP:%.+]] = icmp sgt i32 [[B_INV]], [[BV]] +// CHECK: br i1 [[CMP]], label {{%?}}[[DO_MAX:.+]], label {{%?}}[[MAX_ELSE:.+]] +// +// CHECK: [[DO_MAX]] +// CHECK: [[MAX1:%.+]] = load i16, i16* [[B_IN]], align +// CHECK: br label {{%?}}[[MAX_CONT:.+]] +// +// CHECK: [[MAX_ELSE]] +// CHECK: [[MAX2:%.+]] = load i16, i16* [[B]], align +// CHECK: br label {{%?}}[[MAX_CONT]] +// +// CHECK: [[MAX_CONT]] +// CHECK: [[B_MAX:%.+]] = phi i16 [ [[MAX1]], %[[DO_MAX]] ], [ [[MAX2]], %[[MAX_ELSE]] ] +// CHECK: store i16 [[B_MAX]], i16* [[B_IN]], align +// CHECK: call void @__kmpc_nvptx_end_reduce_nowait( +// CHECK: br label +// +// CHECK: ret + +// +// Reduction function +// CHECK: define internal void [[REDUCTION_FUNC:@.+]](i8* %0, i8* %1) +// CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_RHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[VAR1_RHS_VOID:%.+]] = load i8*, i8** [[VAR1_RHS_REF]], +// CHECK: [[VAR1_RHS:%.+]] = bitcast i8* [[VAR1_RHS_VOID]] to i32* +// +// CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_LHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[VAR1_LHS_VOID:%.+]] = load i8*, i8** [[VAR1_LHS_REF]], +// CHECK: [[VAR1_LHS:%.+]] = bitcast i8* [[VAR1_LHS_VOID]] to i32* +// +// CHECK: [[VAR2_RHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_RHS]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[VAR2_RHS_VOID:%.+]] = load i8*, i8** [[VAR2_RHS_REF]], +// CHECK: [[VAR2_RHS:%.+]] = bitcast i8* [[VAR2_RHS_VOID]] to i16* +// +// CHECK: [[VAR2_LHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_LHS]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[VAR2_LHS_VOID:%.+]] = load i8*, i8** [[VAR2_LHS_REF]], +// CHECK: [[VAR2_LHS:%.+]] = bitcast i8* [[VAR2_LHS_VOID]] to i16* +// +// CHECK: [[VAR1_LHS_VAL:%.+]] = load i32, i32* [[VAR1_LHS]], +// CHECK: [[VAR1_RHS_VAL:%.+]] = load i32, i32* [[VAR1_RHS]], +// CHECK: [[OR:%.+]] = or i32 [[VAR1_LHS_VAL]], [[VAR1_RHS_VAL]] +// CHECK: store i32 [[OR]], i32* [[VAR1_LHS]], +// +// CHECK: [[VAR2_LHS_VAL16:%.+]] = load i16, i16* [[VAR2_LHS]], +// CHECK: [[VAR2_LHS_VAL:%.+]] = sext i16 [[VAR2_LHS_VAL16]] to i32 +// CHECK: [[VAR2_RHS_VAL16:%.+]] = load i16, i16* [[VAR2_RHS]], +// CHECK: [[VAR2_RHS_VAL:%.+]] = sext i16 [[VAR2_RHS_VAL16]] to i32 +// +// CHECK: [[CMP:%.+]] = icmp sgt i32 [[VAR2_LHS_VAL]], [[VAR2_RHS_VAL]] +// CHECK: br i1 [[CMP]], label {{%?}}[[DO_MAX:.+]], label {{%?}}[[MAX_ELSE:.+]] +// +// CHECK: [[DO_MAX]] +// CHECK: [[MAX1:%.+]] = load i16, i16* [[VAR2_LHS]], align +// CHECK: br label {{%?}}[[MAX_CONT:.+]] +// +// CHECK: [[MAX_ELSE]] +// CHECK: [[MAX2:%.+]] = load i16, i16* [[VAR2_RHS]], align +// CHECK: br label {{%?}}[[MAX_CONT]] +// +// CHECK: [[MAX_CONT]] +// CHECK: [[MAXV:%.+]] = phi i16 [ [[MAX1]], %[[DO_MAX]] ], [ [[MAX2]], %[[MAX_ELSE]] ] +// CHECK: store i16 [[MAXV]], i16* [[VAR2_LHS]], +// CHECK: ret void + +// +// Shuffle and reduce function +// CHECK: define internal void [[SHUFFLE_REDUCE_FN]](i8* %0, i16 {{.*}}, i16 {{.*}}, i16 {{.*}}) +// CHECK: [[REMOTE_RED_LIST:%.+]] = alloca [[RLT]], align +// CHECK: [[REMOTE_ELT1:%.+]] = alloca i32 +// CHECK: [[REMOTE_ELT2:%.+]] = alloca i16 +// +// CHECK: [[LANEID:%.+]] = load i16, i16* {{.+}}, align +// CHECK: [[LANEOFFSET:%.+]] = load i16, i16* {{.+}}, align +// CHECK: [[ALGVER:%.+]] = load i16, i16* {{.+}}, align +// +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* +// CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align +// +// CHECK: [[WS32:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16 +// CHECK: [[REMOTE_ELT1_VAL:%.+]] = call i32 @__kmpc_shuffle_int32(i32 [[ELT_VAL]], i16 [[LANEOFFSET]], i16 [[WS]]) +// +// CHECK: store i32 [[REMOTE_ELT1_VAL]], i32* [[REMOTE_ELT1]], align +// CHECK: [[REMOTE_ELT1C:%.+]] = bitcast i32* [[REMOTE_ELT1]] to i8* +// CHECK: store i8* [[REMOTE_ELT1C]], i8** [[REMOTE_ELT_REF]], align +// +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* +// CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align +// +// CHECK: [[ELT_CAST:%.+]] = sext i16 [[ELT_VAL]] to i32 +// CHECK: [[WS32:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16 +// CHECK: [[REMOTE_ELT2_VAL32:%.+]] = call i32 @__kmpc_shuffle_int32(i32 [[ELT_CAST]], i16 [[LANEOFFSET]], i16 [[WS]]) +// CHECK: [[REMOTE_ELT2_VAL:%.+]] = trunc i32 [[REMOTE_ELT2_VAL32]] to i16 +// +// CHECK: store i16 [[REMOTE_ELT2_VAL]], i16* [[REMOTE_ELT2]], align +// CHECK: [[REMOTE_ELT2C:%.+]] = bitcast i16* [[REMOTE_ELT2]] to i8* +// CHECK: store i8* [[REMOTE_ELT2C]], i8** [[REMOTE_ELT_REF]], align +// +// Condition to reduce +// CHECK: [[CONDALG0:%.+]] = icmp eq i16 [[ALGVER]], 0 +// +// CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 +// CHECK: [[COND2:%.+]] = icmp ult i16 [[LANEID]], [[LANEOFFSET]] +// CHECK: [[CONDALG1:%.+]] = and i1 [[COND1]], [[COND2]] +// +// CHECK: [[COND3:%.+]] = icmp eq i16 [[ALGVER]], 2 +// CHECK: [[COND4:%.+]] = and i16 [[LANEID]], 1 +// CHECK: [[COND5:%.+]] = icmp eq i16 [[COND4]], 0 +// CHECK: [[COND6:%.+]] = and i1 [[COND3]], [[COND5]] +// CHECK: [[COND7:%.+]] = icmp sgt i16 [[LANEOFFSET]], 0 +// CHECK: [[CONDALG2:%.+]] = and i1 [[COND6]], [[COND7]] +// +// CHECK: [[COND8:%.+]] = or i1 [[CONDALG0]], [[CONDALG1]] +// CHECK: [[SHOULD_REDUCE:%.+]] = or i1 [[COND8]], [[CONDALG2]] +// CHECK: br i1 [[SHOULD_REDUCE]], label {{%?}}[[DO_REDUCE:.+]], label {{%?}}[[REDUCE_ELSE:.+]] +// +// CHECK: [[DO_REDUCE]] +// CHECK: [[RED_LIST1_VOID:%.+]] = bitcast [[RLT]]* [[RED_LIST]] to i8* +// CHECK: [[RED_LIST2_VOID:%.+]] = bitcast [[RLT]]* [[REMOTE_RED_LIST]] to i8* +// CHECK: call void [[REDUCTION_FUNC]](i8* [[RED_LIST1_VOID]], i8* [[RED_LIST2_VOID]]) +// CHECK: br label {{%?}}[[REDUCE_CONT:.+]] +// +// CHECK: [[REDUCE_ELSE]] +// CHECK: br label {{%?}}[[REDUCE_CONT]] +// +// CHECK: [[REDUCE_CONT]] +// Now check if we should just copy over the remote reduction list +// CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 +// CHECK: [[COND2:%.+]] = icmp uge i16 [[LANEID]], [[LANEOFFSET]] +// CHECK: [[SHOULD_COPY:%.+]] = and i1 [[COND1]], [[COND2]] +// CHECK: br i1 [[SHOULD_COPY]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] +// +// CHECK: [[DO_COPY]] +// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32* +// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* +// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align +// CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align +// +// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16* +// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* +// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align +// CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align +// CHECK: br label {{%?}}[[COPY_CONT:.+]] +// +// CHECK: [[COPY_ELSE]] +// CHECK: br label {{%?}}[[COPY_CONT]] +// +// CHECK: [[COPY_CONT]] +// CHECK: void + +// +// Inter warp copy function +// CHECK: define internal void [[WARP_COPY_FN]](i8* %0, i32 %1) +// CHECK-DAG: [[LANEID:%.+]] = and i32 {{.+}}, 31 +// CHECK-DAG: [[WARPID:%.+]] = ashr i32 {{.+}}, 5 +// CHECK-DAG: [[RED_LIST:%.+]] = bitcast i8* {{.+}} to [[RLT]]* +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ +// CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 +// CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] +// +// [[DO_COPY]] +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* +// +// CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] +// CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align +// CHECK: store volatile i32 [[ELT_VAL]], i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align +// CHECK: br label {{%?}}[[COPY_CONT:.+]] +// +// CHECK: [[COPY_ELSE]] +// CHECK: br label {{%?}}[[COPY_CONT]] +// +// Barrier after copy to shared memory storage medium. +// CHECK: [[COPY_CONT]] +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ +// CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32* +// +// Read into warp 0. +// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]] +// CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] +// +// CHECK: [[DO_READ]] +// CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* +// CHECK: [[MEDIUM_ELT_VAL:%.+]] = load volatile i32, i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align +// CHECK: store i32 [[MEDIUM_ELT_VAL]], i32* [[ELT]], align +// CHECK: br label {{%?}}[[READ_CONT:.+]] +// +// CHECK: [[READ_ELSE]] +// CHECK: br label {{%?}}[[READ_CONT]] +// +// CHECK: [[READ_CONT]] +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ +// CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 +// CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] +// +// [[DO_COPY]] +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* +// +// CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] +// CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT32]] to i16 addrspace([[SHARED_ADDRSPACE]])* +// CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align +// CHECK: store volatile i16 [[ELT_VAL]], i16 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align +// CHECK: br label {{%?}}[[COPY_CONT:.+]] +// +// CHECK: [[COPY_ELSE]] +// CHECK: br label {{%?}}[[COPY_CONT]] +// +// Barrier after copy to shared memory storage medium. +// CHECK: [[COPY_CONT]] +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ +// CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32* +// +// Read into warp 0. +// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]] +// CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] +// +// CHECK: [[DO_READ]] +// CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] +// CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT32]] to i16 addrspace([[SHARED_ADDRSPACE]])* +// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 1 +// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* +// CHECK: [[MEDIUM_ELT_VAL:%.+]] = load volatile i16, i16 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align +// CHECK: store i16 [[MEDIUM_ELT_VAL]], i16* [[ELT]], align +// CHECK: br label {{%?}}[[READ_CONT:.+]] +// +// CHECK: [[READ_ELSE]] +// CHECK: br label {{%?}}[[READ_CONT]] +// +// CHECK: [[READ_CONT]] +// CHECK: ret #endif diff --git a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp index 168b82057cacf..b35609cc5e92e 100644 --- a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp @@ -61,28 +61,28 @@ int bar(int n){ } // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l32}}( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l37}}( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l42}}( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l47}}( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini // CHECK-NOT: call void @__kmpc_nvptx_end_reduce_nowait( diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp index 8ff393f074e4a..3a6e39dfdba11 100644 --- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -232,7 +232,7 @@ int bar(int n){ // CHECK: ret void // CHECK: define weak void @__omp_offloading_{{.*}}ftemplate{{.*}}_l37( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack( // CHECK-NOT: call void @__kmpc_serialized_parallel( diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index 4fd0f71c5e5db..d40aad3dee77f 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -100,7 +100,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l50( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0) // CHECK: call void [[PARALLEL:@.+]]( // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) @@ -128,7 +128,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0) // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: ret void @@ -143,7 +143,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0) // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: ret void @@ -159,7 +159,7 @@ int bar(int n){ // Distribute with collapse(2) // CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]]) // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0) // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp index a933c7e021b82..e9126fce70204 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -83,7 +83,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l43( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0) // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // SEQ: [[SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]], @@ -109,7 +109,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0) // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, @@ -124,7 +124,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0) // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, @@ -140,7 +140,7 @@ int bar(int n){ // CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]]) // CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}}, // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0) // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp index 8d12c857cb434..15f5f09f38993 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp @@ -70,7 +70,7 @@ int bar(int n){ } // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l37( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0) // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, @@ -78,7 +78,7 @@ int bar(int n){ // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l43( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0) // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, @@ -86,7 +86,7 @@ int bar(int n){ // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l48( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0) // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, @@ -95,7 +95,7 @@ int bar(int n){ // CHECK: define {{.*}}void {{@__omp_offloading_.+}}_l53({{.+}}, i{{32|64}} [[F_IN:%.+]]) // CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}}, -// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0) // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align diff --git a/openmp/libomptarget/deviceRTLs/common/omptarget.h b/openmp/libomptarget/deviceRTLs/common/omptarget.h index 6d5d6cd19bd6e..0ccd71c3b55f2 100644 --- a/openmp/libomptarget/deviceRTLs/common/omptarget.h +++ b/openmp/libomptarget/deviceRTLs/common/omptarget.h @@ -92,15 +92,7 @@ struct __kmpc_data_sharing_worker_slot_static { void *DataEnd; char Data[DS_Worker_Warp_Slot_Size]; }; -// Additional master slot type which is initialized with the default master slot -// size of 4 bytes. -struct __kmpc_data_sharing_master_slot_static { - __kmpc_data_sharing_slot *Next; - __kmpc_data_sharing_slot *Prev; - void *PrevSlotStackPtr; - void *DataEnd; - char Data[DS_Slot_Size]; -}; + extern DEVICE SHARED DataSharingStateTy DataSharingState; //////////////////////////////////////////////////////////////////////////////// @@ -204,37 +196,6 @@ class omptarget_nvptx_TeamDescr { // init INLINE void InitTeamDescr(); - INLINE __kmpc_data_sharing_slot *RootS(int wid, bool IsMasterThread) { - // If this is invoked by the master thread of the master warp then - // initialize it with a smaller slot. - if (IsMasterThread) { - // Do not initialize this slot again if it has already been initalized. - if (master_rootS[0].DataEnd == &master_rootS[0].Data[0] + DS_Slot_Size) - return 0; - // Initialize the pointer to the end of the slot given the size of the - // data section. DataEnd is non-inclusive. - master_rootS[0].DataEnd = &master_rootS[0].Data[0] + DS_Slot_Size; - // We currently do not have a next slot. - master_rootS[0].Next = 0; - master_rootS[0].Prev = 0; - master_rootS[0].PrevSlotStackPtr = 0; - return (__kmpc_data_sharing_slot *)&master_rootS[0]; - } - // Do not initialize this slot again if it has already been initalized. - if (worker_rootS[wid].DataEnd == - &worker_rootS[wid].Data[0] + DS_Worker_Warp_Slot_Size) - return 0; - // Initialize the pointer to the end of the slot given the size of the data - // section. DataEnd is non-inclusive. - worker_rootS[wid].DataEnd = - &worker_rootS[wid].Data[0] + DS_Worker_Warp_Slot_Size; - // We currently do not have a next slot. - worker_rootS[wid].Next = 0; - worker_rootS[wid].Prev = 0; - worker_rootS[wid].PrevSlotStackPtr = 0; - return (__kmpc_data_sharing_slot *)&worker_rootS[wid]; - } - INLINE __kmpc_data_sharing_slot *GetPreallocatedSlotAddr(int wid) { worker_rootS[wid].DataEnd = &worker_rootS[wid].Data[0] + DS_Worker_Warp_Slot_Size; @@ -253,7 +214,6 @@ class omptarget_nvptx_TeamDescr { ALIGN(16) __kmpc_data_sharing_worker_slot_static worker_rootS[DS_Max_Warp_Number]; - ALIGN(16) __kmpc_data_sharing_master_slot_static master_rootS[1]; }; //////////////////////////////////////////////////////////////////////////////// diff --git a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu index 6c1d5319595c5..5ccc845394003 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu @@ -77,8 +77,7 @@ EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized) { omptarget_nvptx_workFn = 0; } -EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, - int16_t RequiresDataSharing) { +EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime) { PRINT0(LD_IO, "call to __kmpc_spmd_kernel_init\n"); setExecutionParameters(Spmd, RequiresOMPRuntime ? RuntimeInitialized @@ -134,15 +133,6 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, "thread will execute parallel region with id %d in a team of " "%d threads\n", (int)newTaskDescr->ThreadId(), (int)ThreadLimit); - - if (RequiresDataSharing && GetLaneId() == 0) { - // Warp master initializes data sharing environment. - unsigned WID = threadId / WARPSIZE; - __kmpc_data_sharing_slot *RootS = currTeamDescr.RootS( - WID, WID == WARPSIZE - 1); - DataSharingState.SlotPtr[WID] = RootS; - DataSharingState.StackPtr[WID] = (void *)&RootS->Data[0]; - } } EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) { diff --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h index 4d352bc648fa3..330880556293e 100644 --- a/openmp/libomptarget/deviceRTLs/interface.h +++ b/openmp/libomptarget/deviceRTLs/interface.h @@ -421,8 +421,8 @@ EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid, // non standard EXTERN void __kmpc_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime); EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); -EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, - int16_t RequiresDataSharing); +EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, + int16_t RequiresOMPRuntime); EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn); EXTERN bool __kmpc_kernel_parallel(void **WorkFn); From 8bb702a8ad30205d46818c1e3b1260c19222ba9b Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Thu, 1 Oct 2020 16:37:55 +0100 Subject: [PATCH 120/321] [SVE] Lower fixed length vector floating point rounding operations. Adds lowering for: llvm.ceil llvm.floor llvm.nearbyint llvm.rint llvm.round llvm.trunc Differential Revision: https://reviews.llvm.org/D88671 --- .../Target/AArch64/AArch64ISelLowering.cpp | 6 + .../AArch64/sve-fixed-length-fp-rounding.ll | 1619 +++++++++++++++++ 2 files changed, 1625 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d799e1d2e64c1..d76918d8ace4a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1226,12 +1226,18 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { setOperationAction(ISD::AND, VT, Custom); setOperationAction(ISD::ANY_EXTEND, VT, Custom); setOperationAction(ISD::FADD, VT, Custom); + setOperationAction(ISD::FCEIL, VT, Custom); setOperationAction(ISD::FDIV, VT, Custom); + setOperationAction(ISD::FFLOOR, VT, Custom); setOperationAction(ISD::FMA, VT, Custom); setOperationAction(ISD::FMAXNUM, VT, Custom); setOperationAction(ISD::FMINNUM, VT, Custom); setOperationAction(ISD::FMUL, VT, Custom); + setOperationAction(ISD::FNEARBYINT, VT, Custom); + setOperationAction(ISD::FRINT, VT, Custom); + setOperationAction(ISD::FROUND, VT, Custom); setOperationAction(ISD::FSUB, VT, Custom); + setOperationAction(ISD::FTRUNC, VT, Custom); setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::MUL, VT, Custom); setOperationAction(ISD::OR, VT, Custom); diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll new file mode 100644 index 0000000000000..a3a677a5d9f82 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll @@ -0,0 +1,1619 @@ +; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE +; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256 +; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK +; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048 + +target triple = "aarch64-unknown-linux-gnu" + +; Don't use SVE when its registers are no bigger than NEON. +; NO_SVE-NOT: ptrue + +; +; CEIL -> FRINTP +; + +; Don't use SVE for 64-bit vectors. +define <4 x half> @frintp_v4f16(<4 x half> %op) #0 { +; CHECK-LABEL: frintp_v4f16: +; CHECK: frintp v0.4h, v0.4h +; CHECK-NEXT: ret + %res = call <4 x half> @llvm.ceil.v4f16(<4 x half> %op) + ret <4 x half> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x half> @frintp_v8f16(<8 x half> %op) #0 { +; CHECK-LABEL: frintp_v8f16: +; CHECK: frintp v0.8h, v0.8h +; CHECK-NEXT: ret + %res = call <8 x half> @llvm.ceil.v8f16(<8 x half> %op) + ret <8 x half> %res +} + +define void @frintp_v16f16(<16 x half>* %a) #0 { +; CHECK-LABEL: frintp_v16f16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 +; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <16 x half>, <16 x half>* %a + %res = call <16 x half> @llvm.ceil.v16f16(<16 x half> %op) + store <16 x half> %res, <16 x half>* %a + ret void +} + +define void @frintp_v32f16(<32 x half>* %a) #0 { +; CHECK-LABEL: frintp_v32f16: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h +; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h +; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <32 x half>, <32 x half>* %a + %res = call <32 x half> @llvm.ceil.v32f16(<32 x half> %op) + store <32 x half> %res, <32 x half>* %a + ret void +} + +define void @frintp_v64f16(<64 x half>* %a) #0 { +; CHECK-LABEL: frintp_v64f16: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <64 x half>, <64 x half>* %a + %res = call <64 x half> @llvm.ceil.v64f16(<64 x half> %op) + store <64 x half> %res, <64 x half>* %a + ret void +} + +define void @frintp_v128f16(<128 x half>* %a) #0 { +; CHECK-LABEL: frintp_v128f16: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <128 x half>, <128 x half>* %a + %res = call <128 x half> @llvm.ceil.v128f16(<128 x half> %op) + store <128 x half> %res, <128 x half>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x float> @frintp_v2f32(<2 x float> %op) #0 { +; CHECK-LABEL: frintp_v2f32: +; CHECK: frintp v0.2s, v0.2s +; CHECK-NEXT: ret + %res = call <2 x float> @llvm.ceil.v2f32(<2 x float> %op) + ret <2 x float> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x float> @frintp_v4f32(<4 x float> %op) #0 { +; CHECK-LABEL: frintp_v4f32: +; CHECK: frintp v0.4s, v0.4s +; CHECK-NEXT: ret + %res = call <4 x float> @llvm.ceil.v4f32(<4 x float> %op) + ret <4 x float> %res +} + +define void @frintp_v8f32(<8 x float>* %a) #0 { +; CHECK-LABEL: frintp_v8f32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 +; CHECK-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <8 x float>, <8 x float>* %a + %res = call <8 x float> @llvm.ceil.v8f32(<8 x float> %op) + store <8 x float> %res, <8 x float>* %a + ret void +} + +define void @frintp_v16f32(<16 x float>* %a) #0 { +; CHECK-LABEL: frintp_v16f32: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s +; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s +; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <16 x float>, <16 x float>* %a + %res = call <16 x float> @llvm.ceil.v16f32(<16 x float> %op) + store <16 x float> %res, <16 x float>* %a + ret void +} + +define void @frintp_v32f32(<32 x float>* %a) #0 { +; CHECK-LABEL: frintp_v32f32: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <32 x float>, <32 x float>* %a + %res = call <32 x float> @llvm.ceil.v32f32(<32 x float> %op) + store <32 x float> %res, <32 x float>* %a + ret void +} + +define void @frintp_v64f32(<64 x float>* %a) #0 { +; CHECK-LABEL: frintp_v64f32: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <64 x float>, <64 x float>* %a + %res = call <64 x float> @llvm.ceil.v64f32(<64 x float> %op) + store <64 x float> %res, <64 x float>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x double> @frintp_v1f64(<1 x double> %op) #0 { +; CHECK-LABEL: frintp_v1f64: +; CHECK: frintp d0, d0 +; CHECK-NEXT: ret + %res = call <1 x double> @llvm.ceil.v1f64(<1 x double> %op) + ret <1 x double> %res +} + +; Don't use SVE for 128-bit vectors. +define <2 x double> @frintp_v2f64(<2 x double> %op) #0 { +; CHECK-LABEL: frintp_v2f64: +; CHECK: frintp v0.2d, v0.2d +; CHECK-NEXT: ret + %res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %op) + ret <2 x double> %res +} + +define void @frintp_v4f64(<4 x double>* %a) #0 { +; CHECK-LABEL: frintp_v4f64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 +; CHECK-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <4 x double>, <4 x double>* %a + %res = call <4 x double> @llvm.ceil.v4f64(<4 x double> %op) + store <4 x double> %res, <4 x double>* %a + ret void +} + +define void @frintp_v8f64(<8 x double>* %a) #0 { +; CHECK-LABEL: frintp_v8f64: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d +; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d +; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <8 x double>, <8 x double>* %a + %res = call <8 x double> @llvm.ceil.v8f64(<8 x double> %op) + store <8 x double> %res, <8 x double>* %a + ret void +} + +define void @frintp_v16f64(<16 x double>* %a) #0 { +; CHECK-LABEL: frintp_v16f64: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <16 x double>, <16 x double>* %a + %res = call <16 x double> @llvm.ceil.v16f64(<16 x double> %op) + store <16 x double> %res, <16 x double>* %a + ret void +} + +define void @frintp_v32f64(<32 x double>* %a) #0 { +; CHECK-LABEL: frintp_v32f64: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <32 x double>, <32 x double>* %a + %res = call <32 x double> @llvm.ceil.v32f64(<32 x double> %op) + store <32 x double> %res, <32 x double>* %a + ret void +} + +; +; FLOOR -> FRINTM +; + +; Don't use SVE for 64-bit vectors. +define <4 x half> @frintm_v4f16(<4 x half> %op) #0 { +; CHECK-LABEL: frintm_v4f16: +; CHECK: frintm v0.4h, v0.4h +; CHECK-NEXT: ret + %res = call <4 x half> @llvm.floor.v4f16(<4 x half> %op) + ret <4 x half> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x half> @frintm_v8f16(<8 x half> %op) #0 { +; CHECK-LABEL: frintm_v8f16: +; CHECK: frintm v0.8h, v0.8h +; CHECK-NEXT: ret + %res = call <8 x half> @llvm.floor.v8f16(<8 x half> %op) + ret <8 x half> %res +} + +define void @frintm_v16f16(<16 x half>* %a) #0 { +; CHECK-LABEL: frintm_v16f16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 +; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <16 x half>, <16 x half>* %a + %res = call <16 x half> @llvm.floor.v16f16(<16 x half> %op) + store <16 x half> %res, <16 x half>* %a + ret void +} + +define void @frintm_v32f16(<32 x half>* %a) #0 { +; CHECK-LABEL: frintm_v32f16: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h +; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h +; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <32 x half>, <32 x half>* %a + %res = call <32 x half> @llvm.floor.v32f16(<32 x half> %op) + store <32 x half> %res, <32 x half>* %a + ret void +} + +define void @frintm_v64f16(<64 x half>* %a) #0 { +; CHECK-LABEL: frintm_v64f16: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <64 x half>, <64 x half>* %a + %res = call <64 x half> @llvm.floor.v64f16(<64 x half> %op) + store <64 x half> %res, <64 x half>* %a + ret void +} + +define void @frintm_v128f16(<128 x half>* %a) #0 { +; CHECK-LABEL: frintm_v128f16: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <128 x half>, <128 x half>* %a + %res = call <128 x half> @llvm.floor.v128f16(<128 x half> %op) + store <128 x half> %res, <128 x half>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x float> @frintm_v2f32(<2 x float> %op) #0 { +; CHECK-LABEL: frintm_v2f32: +; CHECK: frintm v0.2s, v0.2s +; CHECK-NEXT: ret + %res = call <2 x float> @llvm.floor.v2f32(<2 x float> %op) + ret <2 x float> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x float> @frintm_v4f32(<4 x float> %op) #0 { +; CHECK-LABEL: frintm_v4f32: +; CHECK: frintm v0.4s, v0.4s +; CHECK-NEXT: ret + %res = call <4 x float> @llvm.floor.v4f32(<4 x float> %op) + ret <4 x float> %res +} + +define void @frintm_v8f32(<8 x float>* %a) #0 { +; CHECK-LABEL: frintm_v8f32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 +; CHECK-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <8 x float>, <8 x float>* %a + %res = call <8 x float> @llvm.floor.v8f32(<8 x float> %op) + store <8 x float> %res, <8 x float>* %a + ret void +} + +define void @frintm_v16f32(<16 x float>* %a) #0 { +; CHECK-LABEL: frintm_v16f32: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s +; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s +; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <16 x float>, <16 x float>* %a + %res = call <16 x float> @llvm.floor.v16f32(<16 x float> %op) + store <16 x float> %res, <16 x float>* %a + ret void +} + +define void @frintm_v32f32(<32 x float>* %a) #0 { +; CHECK-LABEL: frintm_v32f32: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <32 x float>, <32 x float>* %a + %res = call <32 x float> @llvm.floor.v32f32(<32 x float> %op) + store <32 x float> %res, <32 x float>* %a + ret void +} + +define void @frintm_v64f32(<64 x float>* %a) #0 { +; CHECK-LABEL: frintm_v64f32: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <64 x float>, <64 x float>* %a + %res = call <64 x float> @llvm.floor.v64f32(<64 x float> %op) + store <64 x float> %res, <64 x float>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x double> @frintm_v1f64(<1 x double> %op) #0 { +; CHECK-LABEL: frintm_v1f64: +; CHECK: frintm d0, d0 +; CHECK-NEXT: ret + %res = call <1 x double> @llvm.floor.v1f64(<1 x double> %op) + ret <1 x double> %res +} + +; Don't use SVE for 128-bit vectors. +define <2 x double> @frintm_v2f64(<2 x double> %op) #0 { +; CHECK-LABEL: frintm_v2f64: +; CHECK: frintm v0.2d, v0.2d +; CHECK-NEXT: ret + %res = call <2 x double> @llvm.floor.v2f64(<2 x double> %op) + ret <2 x double> %res +} + +define void @frintm_v4f64(<4 x double>* %a) #0 { +; CHECK-LABEL: frintm_v4f64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 +; CHECK-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <4 x double>, <4 x double>* %a + %res = call <4 x double> @llvm.floor.v4f64(<4 x double> %op) + store <4 x double> %res, <4 x double>* %a + ret void +} + +define void @frintm_v8f64(<8 x double>* %a) #0 { +; CHECK-LABEL: frintm_v8f64: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d +; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d +; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <8 x double>, <8 x double>* %a + %res = call <8 x double> @llvm.floor.v8f64(<8 x double> %op) + store <8 x double> %res, <8 x double>* %a + ret void +} + +define void @frintm_v16f64(<16 x double>* %a) #0 { +; CHECK-LABEL: frintm_v16f64: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <16 x double>, <16 x double>* %a + %res = call <16 x double> @llvm.floor.v16f64(<16 x double> %op) + store <16 x double> %res, <16 x double>* %a + ret void +} + +define void @frintm_v32f64(<32 x double>* %a) #0 { +; CHECK-LABEL: frintm_v32f64: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <32 x double>, <32 x double>* %a + %res = call <32 x double> @llvm.floor.v32f64(<32 x double> %op) + store <32 x double> %res, <32 x double>* %a + ret void +} + +; +; FNEARBYINT -> FRINTI +; + +; Don't use SVE for 64-bit vectors. +define <4 x half> @frinti_v4f16(<4 x half> %op) #0 { +; CHECK-LABEL: frinti_v4f16: +; CHECK: frinti v0.4h, v0.4h +; CHECK-NEXT: ret + %res = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %op) + ret <4 x half> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x half> @frinti_v8f16(<8 x half> %op) #0 { +; CHECK-LABEL: frinti_v8f16: +; CHECK: frinti v0.8h, v0.8h +; CHECK-NEXT: ret + %res = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %op) + ret <8 x half> %res +} + +define void @frinti_v16f16(<16 x half>* %a) #0 { +; CHECK-LABEL: frinti_v16f16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 +; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <16 x half>, <16 x half>* %a + %res = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> %op) + store <16 x half> %res, <16 x half>* %a + ret void +} + +define void @frinti_v32f16(<32 x half>* %a) #0 { +; CHECK-LABEL: frinti_v32f16: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h +; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h +; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <32 x half>, <32 x half>* %a + %res = call <32 x half> @llvm.nearbyint.v32f16(<32 x half> %op) + store <32 x half> %res, <32 x half>* %a + ret void +} + +define void @frinti_v64f16(<64 x half>* %a) #0 { +; CHECK-LABEL: frinti_v64f16: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <64 x half>, <64 x half>* %a + %res = call <64 x half> @llvm.nearbyint.v64f16(<64 x half> %op) + store <64 x half> %res, <64 x half>* %a + ret void +} + +define void @frinti_v128f16(<128 x half>* %a) #0 { +; CHECK-LABEL: frinti_v128f16: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <128 x half>, <128 x half>* %a + %res = call <128 x half> @llvm.nearbyint.v128f16(<128 x half> %op) + store <128 x half> %res, <128 x half>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x float> @frinti_v2f32(<2 x float> %op) #0 { +; CHECK-LABEL: frinti_v2f32: +; CHECK: frinti v0.2s, v0.2s +; CHECK-NEXT: ret + %res = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %op) + ret <2 x float> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x float> @frinti_v4f32(<4 x float> %op) #0 { +; CHECK-LABEL: frinti_v4f32: +; CHECK: frinti v0.4s, v0.4s +; CHECK-NEXT: ret + %res = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %op) + ret <4 x float> %res +} + +define void @frinti_v8f32(<8 x float>* %a) #0 { +; CHECK-LABEL: frinti_v8f32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 +; CHECK-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <8 x float>, <8 x float>* %a + %res = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %op) + store <8 x float> %res, <8 x float>* %a + ret void +} + +define void @frinti_v16f32(<16 x float>* %a) #0 { +; CHECK-LABEL: frinti_v16f32: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s +; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s +; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <16 x float>, <16 x float>* %a + %res = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %op) + store <16 x float> %res, <16 x float>* %a + ret void +} + +define void @frinti_v32f32(<32 x float>* %a) #0 { +; CHECK-LABEL: frinti_v32f32: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <32 x float>, <32 x float>* %a + %res = call <32 x float> @llvm.nearbyint.v32f32(<32 x float> %op) + store <32 x float> %res, <32 x float>* %a + ret void +} + +define void @frinti_v64f32(<64 x float>* %a) #0 { +; CHECK-LABEL: frinti_v64f32: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <64 x float>, <64 x float>* %a + %res = call <64 x float> @llvm.nearbyint.v64f32(<64 x float> %op) + store <64 x float> %res, <64 x float>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x double> @frinti_v1f64(<1 x double> %op) #0 { +; CHECK-LABEL: frinti_v1f64: +; CHECK: frinti d0, d0 +; CHECK-NEXT: ret + %res = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %op) + ret <1 x double> %res +} + +; Don't use SVE for 128-bit vectors. +define <2 x double> @frinti_v2f64(<2 x double> %op) #0 { +; CHECK-LABEL: frinti_v2f64: +; CHECK: frinti v0.2d, v0.2d +; CHECK-NEXT: ret + %res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %op) + ret <2 x double> %res +} + +define void @frinti_v4f64(<4 x double>* %a) #0 { +; CHECK-LABEL: frinti_v4f64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 +; CHECK-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <4 x double>, <4 x double>* %a + %res = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %op) + store <4 x double> %res, <4 x double>* %a + ret void +} + +define void @frinti_v8f64(<8 x double>* %a) #0 { +; CHECK-LABEL: frinti_v8f64: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d +; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d +; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <8 x double>, <8 x double>* %a + %res = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %op) + store <8 x double> %res, <8 x double>* %a + ret void +} + +define void @frinti_v16f64(<16 x double>* %a) #0 { +; CHECK-LABEL: frinti_v16f64: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <16 x double>, <16 x double>* %a + %res = call <16 x double> @llvm.nearbyint.v16f64(<16 x double> %op) + store <16 x double> %res, <16 x double>* %a + ret void +} + +define void @frinti_v32f64(<32 x double>* %a) #0 { +; CHECK-LABEL: frinti_v32f64: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <32 x double>, <32 x double>* %a + %res = call <32 x double> @llvm.nearbyint.v32f64(<32 x double> %op) + store <32 x double> %res, <32 x double>* %a + ret void +} + +; +; RINT -> FRINTX +; + +; Don't use SVE for 64-bit vectors. +define <4 x half> @frintx_v4f16(<4 x half> %op) #0 { +; CHECK-LABEL: frintx_v4f16: +; CHECK: frintx v0.4h, v0.4h +; CHECK-NEXT: ret + %res = call <4 x half> @llvm.rint.v4f16(<4 x half> %op) + ret <4 x half> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x half> @frintx_v8f16(<8 x half> %op) #0 { +; CHECK-LABEL: frintx_v8f16: +; CHECK: frintx v0.8h, v0.8h +; CHECK-NEXT: ret + %res = call <8 x half> @llvm.rint.v8f16(<8 x half> %op) + ret <8 x half> %res +} + +define void @frintx_v16f16(<16 x half>* %a) #0 { +; CHECK-LABEL: frintx_v16f16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 +; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <16 x half>, <16 x half>* %a + %res = call <16 x half> @llvm.rint.v16f16(<16 x half> %op) + store <16 x half> %res, <16 x half>* %a + ret void +} + +define void @frintx_v32f16(<32 x half>* %a) #0 { +; CHECK-LABEL: frintx_v32f16: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h +; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h +; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <32 x half>, <32 x half>* %a + %res = call <32 x half> @llvm.rint.v32f16(<32 x half> %op) + store <32 x half> %res, <32 x half>* %a + ret void +} + +define void @frintx_v64f16(<64 x half>* %a) #0 { +; CHECK-LABEL: frintx_v64f16: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <64 x half>, <64 x half>* %a + %res = call <64 x half> @llvm.rint.v64f16(<64 x half> %op) + store <64 x half> %res, <64 x half>* %a + ret void +} + +define void @frintx_v128f16(<128 x half>* %a) #0 { +; CHECK-LABEL: frintx_v128f16: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <128 x half>, <128 x half>* %a + %res = call <128 x half> @llvm.rint.v128f16(<128 x half> %op) + store <128 x half> %res, <128 x half>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x float> @frintx_v2f32(<2 x float> %op) #0 { +; CHECK-LABEL: frintx_v2f32: +; CHECK: frintx v0.2s, v0.2s +; CHECK-NEXT: ret + %res = call <2 x float> @llvm.rint.v2f32(<2 x float> %op) + ret <2 x float> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x float> @frintx_v4f32(<4 x float> %op) #0 { +; CHECK-LABEL: frintx_v4f32: +; CHECK: frintx v0.4s, v0.4s +; CHECK-NEXT: ret + %res = call <4 x float> @llvm.rint.v4f32(<4 x float> %op) + ret <4 x float> %res +} + +define void @frintx_v8f32(<8 x float>* %a) #0 { +; CHECK-LABEL: frintx_v8f32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 +; CHECK-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <8 x float>, <8 x float>* %a + %res = call <8 x float> @llvm.rint.v8f32(<8 x float> %op) + store <8 x float> %res, <8 x float>* %a + ret void +} + +define void @frintx_v16f32(<16 x float>* %a) #0 { +; CHECK-LABEL: frintx_v16f32: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s +; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s +; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <16 x float>, <16 x float>* %a + %res = call <16 x float> @llvm.rint.v16f32(<16 x float> %op) + store <16 x float> %res, <16 x float>* %a + ret void +} + +define void @frintx_v32f32(<32 x float>* %a) #0 { +; CHECK-LABEL: frintx_v32f32: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <32 x float>, <32 x float>* %a + %res = call <32 x float> @llvm.rint.v32f32(<32 x float> %op) + store <32 x float> %res, <32 x float>* %a + ret void +} + +define void @frintx_v64f32(<64 x float>* %a) #0 { +; CHECK-LABEL: frintx_v64f32: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <64 x float>, <64 x float>* %a + %res = call <64 x float> @llvm.rint.v64f32(<64 x float> %op) + store <64 x float> %res, <64 x float>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x double> @frintx_v1f64(<1 x double> %op) #0 { +; CHECK-LABEL: frintx_v1f64: +; CHECK: frintx d0, d0 +; CHECK-NEXT: ret + %res = call <1 x double> @llvm.rint.v1f64(<1 x double> %op) + ret <1 x double> %res +} + +; Don't use SVE for 128-bit vectors. +define <2 x double> @frintx_v2f64(<2 x double> %op) #0 { +; CHECK-LABEL: frintx_v2f64: +; CHECK: frintx v0.2d, v0.2d +; CHECK-NEXT: ret + %res = call <2 x double> @llvm.rint.v2f64(<2 x double> %op) + ret <2 x double> %res +} + +define void @frintx_v4f64(<4 x double>* %a) #0 { +; CHECK-LABEL: frintx_v4f64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 +; CHECK-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <4 x double>, <4 x double>* %a + %res = call <4 x double> @llvm.rint.v4f64(<4 x double> %op) + store <4 x double> %res, <4 x double>* %a + ret void +} + +define void @frintx_v8f64(<8 x double>* %a) #0 { +; CHECK-LABEL: frintx_v8f64: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d +; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d +; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <8 x double>, <8 x double>* %a + %res = call <8 x double> @llvm.rint.v8f64(<8 x double> %op) + store <8 x double> %res, <8 x double>* %a + ret void +} + +define void @frintx_v16f64(<16 x double>* %a) #0 { +; CHECK-LABEL: frintx_v16f64: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <16 x double>, <16 x double>* %a + %res = call <16 x double> @llvm.rint.v16f64(<16 x double> %op) + store <16 x double> %res, <16 x double>* %a + ret void +} + +define void @frintx_v32f64(<32 x double>* %a) #0 { +; CHECK-LABEL: frintx_v32f64: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <32 x double>, <32 x double>* %a + %res = call <32 x double> @llvm.rint.v32f64(<32 x double> %op) + store <32 x double> %res, <32 x double>* %a + ret void +} + +; +; ROUND -> FRINTA +; + +; Don't use SVE for 64-bit vectors. +define <4 x half> @frinta_v4f16(<4 x half> %op) #0 { +; CHECK-LABEL: frinta_v4f16: +; CHECK: frinta v0.4h, v0.4h +; CHECK-NEXT: ret + %res = call <4 x half> @llvm.round.v4f16(<4 x half> %op) + ret <4 x half> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x half> @frinta_v8f16(<8 x half> %op) #0 { +; CHECK-LABEL: frinta_v8f16: +; CHECK: frinta v0.8h, v0.8h +; CHECK-NEXT: ret + %res = call <8 x half> @llvm.round.v8f16(<8 x half> %op) + ret <8 x half> %res +} + +define void @frinta_v16f16(<16 x half>* %a) #0 { +; CHECK-LABEL: frinta_v16f16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 +; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <16 x half>, <16 x half>* %a + %res = call <16 x half> @llvm.round.v16f16(<16 x half> %op) + store <16 x half> %res, <16 x half>* %a + ret void +} + +define void @frinta_v32f16(<32 x half>* %a) #0 { +; CHECK-LABEL: frinta_v32f16: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h +; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h +; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <32 x half>, <32 x half>* %a + %res = call <32 x half> @llvm.round.v32f16(<32 x half> %op) + store <32 x half> %res, <32 x half>* %a + ret void +} + +define void @frinta_v64f16(<64 x half>* %a) #0 { +; CHECK-LABEL: frinta_v64f16: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <64 x half>, <64 x half>* %a + %res = call <64 x half> @llvm.round.v64f16(<64 x half> %op) + store <64 x half> %res, <64 x half>* %a + ret void +} + +define void @frinta_v128f16(<128 x half>* %a) #0 { +; CHECK-LABEL: frinta_v128f16: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <128 x half>, <128 x half>* %a + %res = call <128 x half> @llvm.round.v128f16(<128 x half> %op) + store <128 x half> %res, <128 x half>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x float> @frinta_v2f32(<2 x float> %op) #0 { +; CHECK-LABEL: frinta_v2f32: +; CHECK: frinta v0.2s, v0.2s +; CHECK-NEXT: ret + %res = call <2 x float> @llvm.round.v2f32(<2 x float> %op) + ret <2 x float> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x float> @frinta_v4f32(<4 x float> %op) #0 { +; CHECK-LABEL: frinta_v4f32: +; CHECK: frinta v0.4s, v0.4s +; CHECK-NEXT: ret + %res = call <4 x float> @llvm.round.v4f32(<4 x float> %op) + ret <4 x float> %res +} + +define void @frinta_v8f32(<8 x float>* %a) #0 { +; CHECK-LABEL: frinta_v8f32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 +; CHECK-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <8 x float>, <8 x float>* %a + %res = call <8 x float> @llvm.round.v8f32(<8 x float> %op) + store <8 x float> %res, <8 x float>* %a + ret void +} + +define void @frinta_v16f32(<16 x float>* %a) #0 { +; CHECK-LABEL: frinta_v16f32: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s +; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s +; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <16 x float>, <16 x float>* %a + %res = call <16 x float> @llvm.round.v16f32(<16 x float> %op) + store <16 x float> %res, <16 x float>* %a + ret void +} + +define void @frinta_v32f32(<32 x float>* %a) #0 { +; CHECK-LABEL: frinta_v32f32: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <32 x float>, <32 x float>* %a + %res = call <32 x float> @llvm.round.v32f32(<32 x float> %op) + store <32 x float> %res, <32 x float>* %a + ret void +} + +define void @frinta_v64f32(<64 x float>* %a) #0 { +; CHECK-LABEL: frinta_v64f32: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <64 x float>, <64 x float>* %a + %res = call <64 x float> @llvm.round.v64f32(<64 x float> %op) + store <64 x float> %res, <64 x float>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x double> @frinta_v1f64(<1 x double> %op) #0 { +; CHECK-LABEL: frinta_v1f64: +; CHECK: frinta d0, d0 +; CHECK-NEXT: ret + %res = call <1 x double> @llvm.round.v1f64(<1 x double> %op) + ret <1 x double> %res +} + +; Don't use SVE for 128-bit vectors. +define <2 x double> @frinta_v2f64(<2 x double> %op) #0 { +; CHECK-LABEL: frinta_v2f64: +; CHECK: frinta v0.2d, v0.2d +; CHECK-NEXT: ret + %res = call <2 x double> @llvm.round.v2f64(<2 x double> %op) + ret <2 x double> %res +} + +define void @frinta_v4f64(<4 x double>* %a) #0 { +; CHECK-LABEL: frinta_v4f64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 +; CHECK-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <4 x double>, <4 x double>* %a + %res = call <4 x double> @llvm.round.v4f64(<4 x double> %op) + store <4 x double> %res, <4 x double>* %a + ret void +} + +define void @frinta_v8f64(<8 x double>* %a) #0 { +; CHECK-LABEL: frinta_v8f64: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d +; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d +; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <8 x double>, <8 x double>* %a + %res = call <8 x double> @llvm.round.v8f64(<8 x double> %op) + store <8 x double> %res, <8 x double>* %a + ret void +} + +define void @frinta_v16f64(<16 x double>* %a) #0 { +; CHECK-LABEL: frinta_v16f64: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <16 x double>, <16 x double>* %a + %res = call <16 x double> @llvm.round.v16f64(<16 x double> %op) + store <16 x double> %res, <16 x double>* %a + ret void +} + +define void @frinta_v32f64(<32 x double>* %a) #0 { +; CHECK-LABEL: frinta_v32f64: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <32 x double>, <32 x double>* %a + %res = call <32 x double> @llvm.round.v32f64(<32 x double> %op) + store <32 x double> %res, <32 x double>* %a + ret void +} + +; +; TRUNC -> FRINTZ +; + +; Don't use SVE for 64-bit vectors. +define <4 x half> @frintz_v4f16(<4 x half> %op) #0 { +; CHECK-LABEL: frintz_v4f16: +; CHECK: frintz v0.4h, v0.4h +; CHECK-NEXT: ret + %res = call <4 x half> @llvm.trunc.v4f16(<4 x half> %op) + ret <4 x half> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x half> @frintz_v8f16(<8 x half> %op) #0 { +; CHECK-LABEL: frintz_v8f16: +; CHECK: frintz v0.8h, v0.8h +; CHECK-NEXT: ret + %res = call <8 x half> @llvm.trunc.v8f16(<8 x half> %op) + ret <8 x half> %res +} + +define void @frintz_v16f16(<16 x half>* %a) #0 { +; CHECK-LABEL: frintz_v16f16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 +; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <16 x half>, <16 x half>* %a + %res = call <16 x half> @llvm.trunc.v16f16(<16 x half> %op) + store <16 x half> %res, <16 x half>* %a + ret void +} + +define void @frintz_v32f16(<32 x half>* %a) #0 { +; CHECK-LABEL: frintz_v32f16: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h +; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h +; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <32 x half>, <32 x half>* %a + %res = call <32 x half> @llvm.trunc.v32f16(<32 x half> %op) + store <32 x half> %res, <32 x half>* %a + ret void +} + +define void @frintz_v64f16(<64 x half>* %a) #0 { +; CHECK-LABEL: frintz_v64f16: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <64 x half>, <64 x half>* %a + %res = call <64 x half> @llvm.trunc.v64f16(<64 x half> %op) + store <64 x half> %res, <64 x half>* %a + ret void +} + +define void @frintz_v128f16(<128 x half>* %a) #0 { +; CHECK-LABEL: frintz_v128f16: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <128 x half>, <128 x half>* %a + %res = call <128 x half> @llvm.trunc.v128f16(<128 x half> %op) + store <128 x half> %res, <128 x half>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x float> @frintz_v2f32(<2 x float> %op) #0 { +; CHECK-LABEL: frintz_v2f32: +; CHECK: frintz v0.2s, v0.2s +; CHECK-NEXT: ret + %res = call <2 x float> @llvm.trunc.v2f32(<2 x float> %op) + ret <2 x float> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x float> @frintz_v4f32(<4 x float> %op) #0 { +; CHECK-LABEL: frintz_v4f32: +; CHECK: frintz v0.4s, v0.4s +; CHECK-NEXT: ret + %res = call <4 x float> @llvm.trunc.v4f32(<4 x float> %op) + ret <4 x float> %res +} + +define void @frintz_v8f32(<8 x float>* %a) #0 { +; CHECK-LABEL: frintz_v8f32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 +; CHECK-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <8 x float>, <8 x float>* %a + %res = call <8 x float> @llvm.trunc.v8f32(<8 x float> %op) + store <8 x float> %res, <8 x float>* %a + ret void +} + +define void @frintz_v16f32(<16 x float>* %a) #0 { +; CHECK-LABEL: frintz_v16f32: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s +; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s +; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <16 x float>, <16 x float>* %a + %res = call <16 x float> @llvm.trunc.v16f32(<16 x float> %op) + store <16 x float> %res, <16 x float>* %a + ret void +} + +define void @frintz_v32f32(<32 x float>* %a) #0 { +; CHECK-LABEL: frintz_v32f32: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <32 x float>, <32 x float>* %a + %res = call <32 x float> @llvm.trunc.v32f32(<32 x float> %op) + store <32 x float> %res, <32 x float>* %a + ret void +} + +define void @frintz_v64f32(<64 x float>* %a) #0 { +; CHECK-LABEL: frintz_v64f32: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <64 x float>, <64 x float>* %a + %res = call <64 x float> @llvm.trunc.v64f32(<64 x float> %op) + store <64 x float> %res, <64 x float>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x double> @frintz_v1f64(<1 x double> %op) #0 { +; CHECK-LABEL: frintz_v1f64: +; CHECK: frintz d0, d0 +; CHECK-NEXT: ret + %res = call <1 x double> @llvm.trunc.v1f64(<1 x double> %op) + ret <1 x double> %res +} + +; Don't use SVE for 128-bit vectors. +define <2 x double> @frintz_v2f64(<2 x double> %op) #0 { +; CHECK-LABEL: frintz_v2f64: +; CHECK: frintz v0.2d, v0.2d +; CHECK-NEXT: ret + %res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %op) + ret <2 x double> %res +} + +define void @frintz_v4f64(<4 x double>* %a) #0 { +; CHECK-LABEL: frintz_v4f64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 +; CHECK-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <4 x double>, <4 x double>* %a + %res = call <4 x double> @llvm.trunc.v4f64(<4 x double> %op) + store <4 x double> %res, <4 x double>* %a + ret void +} + +define void @frintz_v8f64(<8 x double>* %a) #0 { +; CHECK-LABEL: frintz_v8f64: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d +; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d +; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <8 x double>, <8 x double>* %a + %res = call <8 x double> @llvm.trunc.v8f64(<8 x double> %op) + store <8 x double> %res, <8 x double>* %a + ret void +} + +define void @frintz_v16f64(<16 x double>* %a) #0 { +; CHECK-LABEL: frintz_v16f64: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <16 x double>, <16 x double>* %a + %res = call <16 x double> @llvm.trunc.v16f64(<16 x double> %op) + store <16 x double> %res, <16 x double>* %a + ret void +} + +define void @frintz_v32f64(<32 x double>* %a) #0 { +; CHECK-LABEL: frintz_v32f64: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <32 x double>, <32 x double>* %a + %res = call <32 x double> @llvm.trunc.v32f64(<32 x double> %op) + store <32 x double> %res, <32 x double>* %a + ret void +} + +attributes #0 = { "target-features"="+sve" } + +declare <4 x half> @llvm.ceil.v4f16(<4 x half>) +declare <8 x half> @llvm.ceil.v8f16(<8 x half>) +declare <16 x half> @llvm.ceil.v16f16(<16 x half>) +declare <32 x half> @llvm.ceil.v32f16(<32 x half>) +declare <64 x half> @llvm.ceil.v64f16(<64 x half>) +declare <128 x half> @llvm.ceil.v128f16(<128 x half>) +declare <2 x float> @llvm.ceil.v2f32(<2 x float>) +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) +declare <8 x float> @llvm.ceil.v8f32(<8 x float>) +declare <16 x float> @llvm.ceil.v16f32(<16 x float>) +declare <32 x float> @llvm.ceil.v32f32(<32 x float>) +declare <64 x float> @llvm.ceil.v64f32(<64 x float>) +declare <1 x double> @llvm.ceil.v1f64(<1 x double>) +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) +declare <8 x double> @llvm.ceil.v8f64(<8 x double>) +declare <16 x double> @llvm.ceil.v16f64(<16 x double>) +declare <32 x double> @llvm.ceil.v32f64(<32 x double>) + +declare <4 x half> @llvm.floor.v4f16(<4 x half>) +declare <8 x half> @llvm.floor.v8f16(<8 x half>) +declare <16 x half> @llvm.floor.v16f16(<16 x half>) +declare <32 x half> @llvm.floor.v32f16(<32 x half>) +declare <64 x half> @llvm.floor.v64f16(<64 x half>) +declare <128 x half> @llvm.floor.v128f16(<128 x half>) +declare <2 x float> @llvm.floor.v2f32(<2 x float>) +declare <4 x float> @llvm.floor.v4f32(<4 x float>) +declare <8 x float> @llvm.floor.v8f32(<8 x float>) +declare <16 x float> @llvm.floor.v16f32(<16 x float>) +declare <32 x float> @llvm.floor.v32f32(<32 x float>) +declare <64 x float> @llvm.floor.v64f32(<64 x float>) +declare <1 x double> @llvm.floor.v1f64(<1 x double>) +declare <2 x double> @llvm.floor.v2f64(<2 x double>) +declare <4 x double> @llvm.floor.v4f64(<4 x double>) +declare <8 x double> @llvm.floor.v8f64(<8 x double>) +declare <16 x double> @llvm.floor.v16f64(<16 x double>) +declare <32 x double> @llvm.floor.v32f64(<32 x double>) + +declare <4 x half> @llvm.nearbyint.v4f16(<4 x half>) +declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>) +declare <16 x half> @llvm.nearbyint.v16f16(<16 x half>) +declare <32 x half> @llvm.nearbyint.v32f16(<32 x half>) +declare <64 x half> @llvm.nearbyint.v64f16(<64 x half>) +declare <128 x half> @llvm.nearbyint.v128f16(<128 x half>) +declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) +declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>) +declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>) +declare <32 x float> @llvm.nearbyint.v32f32(<32 x float>) +declare <64 x float> @llvm.nearbyint.v64f32(<64 x float>) +declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>) +declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) +declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) +declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>) +declare <16 x double> @llvm.nearbyint.v16f64(<16 x double>) +declare <32 x double> @llvm.nearbyint.v32f64(<32 x double>) + +declare <4 x half> @llvm.rint.v4f16(<4 x half>) +declare <8 x half> @llvm.rint.v8f16(<8 x half>) +declare <16 x half> @llvm.rint.v16f16(<16 x half>) +declare <32 x half> @llvm.rint.v32f16(<32 x half>) +declare <64 x half> @llvm.rint.v64f16(<64 x half>) +declare <128 x half> @llvm.rint.v128f16(<128 x half>) +declare <2 x float> @llvm.rint.v2f32(<2 x float>) +declare <4 x float> @llvm.rint.v4f32(<4 x float>) +declare <8 x float> @llvm.rint.v8f32(<8 x float>) +declare <16 x float> @llvm.rint.v16f32(<16 x float>) +declare <32 x float> @llvm.rint.v32f32(<32 x float>) +declare <64 x float> @llvm.rint.v64f32(<64 x float>) +declare <1 x double> @llvm.rint.v1f64(<1 x double>) +declare <2 x double> @llvm.rint.v2f64(<2 x double>) +declare <4 x double> @llvm.rint.v4f64(<4 x double>) +declare <8 x double> @llvm.rint.v8f64(<8 x double>) +declare <16 x double> @llvm.rint.v16f64(<16 x double>) +declare <32 x double> @llvm.rint.v32f64(<32 x double>) + +declare <4 x half> @llvm.round.v4f16(<4 x half>) +declare <8 x half> @llvm.round.v8f16(<8 x half>) +declare <16 x half> @llvm.round.v16f16(<16 x half>) +declare <32 x half> @llvm.round.v32f16(<32 x half>) +declare <64 x half> @llvm.round.v64f16(<64 x half>) +declare <128 x half> @llvm.round.v128f16(<128 x half>) +declare <2 x float> @llvm.round.v2f32(<2 x float>) +declare <4 x float> @llvm.round.v4f32(<4 x float>) +declare <8 x float> @llvm.round.v8f32(<8 x float>) +declare <16 x float> @llvm.round.v16f32(<16 x float>) +declare <32 x float> @llvm.round.v32f32(<32 x float>) +declare <64 x float> @llvm.round.v64f32(<64 x float>) +declare <1 x double> @llvm.round.v1f64(<1 x double>) +declare <2 x double> @llvm.round.v2f64(<2 x double>) +declare <4 x double> @llvm.round.v4f64(<4 x double>) +declare <8 x double> @llvm.round.v8f64(<8 x double>) +declare <16 x double> @llvm.round.v16f64(<16 x double>) +declare <32 x double> @llvm.round.v32f64(<32 x double>) + +declare <4 x half> @llvm.trunc.v4f16(<4 x half>) +declare <8 x half> @llvm.trunc.v8f16(<8 x half>) +declare <16 x half> @llvm.trunc.v16f16(<16 x half>) +declare <32 x half> @llvm.trunc.v32f16(<32 x half>) +declare <64 x half> @llvm.trunc.v64f16(<64 x half>) +declare <128 x half> @llvm.trunc.v128f16(<128 x half>) +declare <2 x float> @llvm.trunc.v2f32(<2 x float>) +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) +declare <8 x float> @llvm.trunc.v8f32(<8 x float>) +declare <16 x float> @llvm.trunc.v16f32(<16 x float>) +declare <32 x float> @llvm.trunc.v32f32(<32 x float>) +declare <64 x float> @llvm.trunc.v64f32(<64 x float>) +declare <1 x double> @llvm.trunc.v1f64(<1 x double>) +declare <2 x double> @llvm.trunc.v2f64(<2 x double>) +declare <4 x double> @llvm.trunc.v4f64(<4 x double>) +declare <8 x double> @llvm.trunc.v8f64(<8 x double>) +declare <16 x double> @llvm.trunc.v16f64(<16 x double>) +declare <32 x double> @llvm.trunc.v32f64(<32 x double>) From 27f3d51b4ef9bf4dc9cfeed88f66fcfc7ee99ee9 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Thu, 1 Oct 2020 18:08:33 +0100 Subject: [PATCH 121/321] [SVE] Lower fixed length vector fneg and fsqrt operations. Also updates sve-fp.ll to use fneg directly. Differential Revision: https://reviews.llvm.org/D88683 --- .../Target/AArch64/AArch64ISelLowering.cpp | 2 + .../AArch64/sve-fixed-length-fp-arith.ll | 447 ++++++++++++++++++ llvm/test/CodeGen/AArch64/sve-fp.ll | 36 +- 3 files changed, 461 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d76918d8ace4a..308628a7f8348 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1234,8 +1234,10 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { setOperationAction(ISD::FMINNUM, VT, Custom); setOperationAction(ISD::FMUL, VT, Custom); setOperationAction(ISD::FNEARBYINT, VT, Custom); + setOperationAction(ISD::FNEG, VT, Custom); setOperationAction(ISD::FRINT, VT, Custom); setOperationAction(ISD::FROUND, VT, Custom); + setOperationAction(ISD::FSQRT, VT, Custom); setOperationAction(ISD::FSUB, VT, Custom); setOperationAction(ISD::FTRUNC, VT, Custom); setOperationAction(ISD::LOAD, VT, Custom); diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll index f8a37e7d81fe0..1806b4945ec91 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll @@ -1044,6 +1044,434 @@ define void @fmul_v32f64(<32 x double>* %a, <32 x double>* %b) #0 { ret void } +; +; FNEG +; + +; Don't use SVE for 64-bit vectors. +define <4 x half> @fneg_v4f16(<4 x half> %op) #0 { +; CHECK-LABEL: fneg_v4f16: +; CHECK: fneg v0.4h, v0.4h +; CHECK: ret + %res = fneg <4 x half> %op + ret <4 x half> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x half> @fneg_v8f16(<8 x half> %op) #0 { +; CHECK-LABEL: fneg_v8f16: +; CHECK: fneg v0.8h, v0.8h +; CHECK: ret + %res = fneg <8 x half> %op + ret <8 x half> %res +} + +define void @fneg_v16f16(<16 x half>* %a, <16 x half>* %b) #0 { +; CHECK-LABEL: fneg_v16f16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),16)]] +; CHECK: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK: fneg [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op = load <16 x half>, <16 x half>* %a + %res = fneg <16 x half> %op + store <16 x half> %res, <16 x half>* %a + ret void +} + +define void @fneg_v32f16(<32 x half>* %a) #0 { +; CHECK-LABEL: fneg_v32f16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),32)]] +; CHECK: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK: fneg [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op = load <32 x half>, <32 x half>* %a + %res = fneg <32 x half> %op + store <32 x half> %res, <32 x half>* %a + ret void +} + +define void @fneg_v64f16(<64 x half>* %a) #0 { +; CHECK-LABEL: fneg_v64f16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),64)]] +; CHECK: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK: fneg [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op = load <64 x half>, <64 x half>* %a + %res = fneg <64 x half> %op + store <64 x half> %res, <64 x half>* %a + ret void +} + +define void @fneg_v128f16(<128 x half>* %a) #0 { +; CHECK-LABEL: fneg_v128f16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),128)]] +; CHECK: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK: fneg [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op = load <128 x half>, <128 x half>* %a + %res = fneg <128 x half> %op + store <128 x half> %res, <128 x half>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x float> @fneg_v2f32(<2 x float> %op) #0 { +; CHECK-LABEL: fneg_v2f32: +; CHECK: fneg v0.2s, v0.2s +; CHECK: ret + %res = fneg <2 x float> %op + ret <2 x float> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x float> @fneg_v4f32(<4 x float> %op) #0 { +; CHECK-LABEL: fneg_v4f32: +; CHECK: fneg v0.4s, v0.4s +; CHECK: ret + %res = fneg <4 x float> %op + ret <4 x float> %res +} + +define void @fneg_v8f32(<8 x float>* %a) #0 { +; CHECK-LABEL: fneg_v8f32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]] +; CHECK: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK: fneg [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op = load <8 x float>, <8 x float>* %a + %res = fneg <8 x float> %op + store <8 x float> %res, <8 x float>* %a + ret void +} + +define void @fneg_v16f32(<16 x float>* %a) #0 { +; CHECK-LABEL: fneg_v16f32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]] +; CHECK: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK: fneg [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op = load <16 x float>, <16 x float>* %a + %res = fneg <16 x float> %op + store <16 x float> %res, <16 x float>* %a + ret void +} + +define void @fneg_v32f32(<32 x float>* %a) #0 { +; CHECK-LABEL: fneg_v32f32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]] +; CHECK: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK: fneg [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op = load <32 x float>, <32 x float>* %a + %res = fneg <32 x float> %op + store <32 x float> %res, <32 x float>* %a + ret void +} + +define void @fneg_v64f32(<64 x float>* %a) #0 { +; CHECK-LABEL: fneg_v64f32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]] +; CHECK: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK: fneg [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op = load <64 x float>, <64 x float>* %a + %res = fneg <64 x float> %op + store <64 x float> %res, <64 x float>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x double> @fneg_v1f64(<1 x double> %op) #0 { +; CHECK-LABEL: fneg_v1f64: +; CHECK: fneg d0, d0 +; CHECK: ret + %res = fneg <1 x double> %op + ret <1 x double> %res +} + +; Don't use SVE for 128-bit vectors. +define <2 x double> @fneg_v2f64(<2 x double> %op) #0 { +; CHECK-LABEL: fneg_v2f64: +; CHECK: fneg v0.2d, v0.2d +; CHECK: ret + %res = fneg <2 x double> %op + ret <2 x double> %res +} + +define void @fneg_v4f64(<4 x double>* %a) #0 { +; CHECK-LABEL: fneg_v4f64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),4)]] +; CHECK: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK: fneg [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op = load <4 x double>, <4 x double>* %a + %res = fneg <4 x double> %op + store <4 x double> %res, <4 x double>* %a + ret void +} + +define void @fneg_v8f64(<8 x double>* %a) #0 { +; CHECK-LABEL: fneg_v8f64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),8)]] +; CHECK: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK: fneg [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op = load <8 x double>, <8 x double>* %a + %res = fneg <8 x double> %op + store <8 x double> %res, <8 x double>* %a + ret void +} + +define void @fneg_v16f64(<16 x double>* %a) #0 { +; CHECK-LABEL: fneg_v16f64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),16)]] +; CHECK: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK: fneg [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op = load <16 x double>, <16 x double>* %a + %res = fneg <16 x double> %op + store <16 x double> %res, <16 x double>* %a + ret void +} + +define void @fneg_v32f64(<32 x double>* %a) #0 { +; CHECK-LABEL: fneg_v32f64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),32)]] +; CHECK: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK: fneg [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op = load <32 x double>, <32 x double>* %a + %res = fneg <32 x double> %op + store <32 x double> %res, <32 x double>* %a + ret void +} + +; +; FSQRT +; + +; Don't use SVE for 64-bit vectors. +define <4 x half> @fsqrt_v4f16(<4 x half> %op) #0 { +; CHECK-LABEL: fsqrt_v4f16: +; CHECK: fsqrt v0.4h, v0.4h +; CHECK: ret + %res = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %op) + ret <4 x half> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x half> @fsqrt_v8f16(<8 x half> %op) #0 { +; CHECK-LABEL: fsqrt_v8f16: +; CHECK: fsqrt v0.8h, v0.8h +; CHECK: ret + %res = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %op) + ret <8 x half> %res +} + +define void @fsqrt_v16f16(<16 x half>* %a, <16 x half>* %b) #0 { +; CHECK-LABEL: fsqrt_v16f16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),16)]] +; CHECK: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK: fsqrt [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op = load <16 x half>, <16 x half>* %a + %res = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %op) + store <16 x half> %res, <16 x half>* %a + ret void +} + +define void @fsqrt_v32f16(<32 x half>* %a) #0 { +; CHECK-LABEL: fsqrt_v32f16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),32)]] +; CHECK: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK: fsqrt [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op = load <32 x half>, <32 x half>* %a + %res = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %op) + store <32 x half> %res, <32 x half>* %a + ret void +} + +define void @fsqrt_v64f16(<64 x half>* %a) #0 { +; CHECK-LABEL: fsqrt_v64f16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),64)]] +; CHECK: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK: fsqrt [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op = load <64 x half>, <64 x half>* %a + %res = call <64 x half> @llvm.sqrt.v64f16(<64 x half> %op) + store <64 x half> %res, <64 x half>* %a + ret void +} + +define void @fsqrt_v128f16(<128 x half>* %a) #0 { +; CHECK-LABEL: fsqrt_v128f16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),128)]] +; CHECK: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK: fsqrt [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op = load <128 x half>, <128 x half>* %a + %res = call <128 x half> @llvm.sqrt.v128f16(<128 x half> %op) + store <128 x half> %res, <128 x half>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x float> @fsqrt_v2f32(<2 x float> %op) #0 { +; CHECK-LABEL: fsqrt_v2f32: +; CHECK: fsqrt v0.2s, v0.2s +; CHECK: ret + %res = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %op) + ret <2 x float> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x float> @fsqrt_v4f32(<4 x float> %op) #0 { +; CHECK-LABEL: fsqrt_v4f32: +; CHECK: fsqrt v0.4s, v0.4s +; CHECK: ret + %res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %op) + ret <4 x float> %res +} + +define void @fsqrt_v8f32(<8 x float>* %a) #0 { +; CHECK-LABEL: fsqrt_v8f32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]] +; CHECK: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK: fsqrt [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op = load <8 x float>, <8 x float>* %a + %res = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %op) + store <8 x float> %res, <8 x float>* %a + ret void +} + +define void @fsqrt_v16f32(<16 x float>* %a) #0 { +; CHECK-LABEL: fsqrt_v16f32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]] +; CHECK: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK: fsqrt [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op = load <16 x float>, <16 x float>* %a + %res = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %op) + store <16 x float> %res, <16 x float>* %a + ret void +} + +define void @fsqrt_v32f32(<32 x float>* %a) #0 { +; CHECK-LABEL: fsqrt_v32f32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]] +; CHECK: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK: fsqrt [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op = load <32 x float>, <32 x float>* %a + %res = call <32 x float> @llvm.sqrt.v32f32(<32 x float> %op) + store <32 x float> %res, <32 x float>* %a + ret void +} + +define void @fsqrt_v64f32(<64 x float>* %a) #0 { +; CHECK-LABEL: fsqrt_v64f32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]] +; CHECK: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK: fsqrt [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op = load <64 x float>, <64 x float>* %a + %res = call <64 x float> @llvm.sqrt.v64f32(<64 x float> %op) + store <64 x float> %res, <64 x float>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x double> @fsqrt_v1f64(<1 x double> %op) #0 { +; CHECK-LABEL: fsqrt_v1f64: +; CHECK: fsqrt d0, d0 +; CHECK: ret + %res = call <1 x double> @llvm.sqrt.v1f64(<1 x double> %op) + ret <1 x double> %res +} + +; Don't use SVE for 128-bit vectors. +define <2 x double> @fsqrt_v2f64(<2 x double> %op) #0 { +; CHECK-LABEL: fsqrt_v2f64: +; CHECK: fsqrt v0.2d, v0.2d +; CHECK: ret + %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %op) + ret <2 x double> %res +} + +define void @fsqrt_v4f64(<4 x double>* %a) #0 { +; CHECK-LABEL: fsqrt_v4f64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),4)]] +; CHECK: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK: fsqrt [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op = load <4 x double>, <4 x double>* %a + %res = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %op) + store <4 x double> %res, <4 x double>* %a + ret void +} + +define void @fsqrt_v8f64(<8 x double>* %a) #0 { +; CHECK-LABEL: fsqrt_v8f64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),8)]] +; CHECK: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK: fsqrt [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op = load <8 x double>, <8 x double>* %a + %res = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %op) + store <8 x double> %res, <8 x double>* %a + ret void +} + +define void @fsqrt_v16f64(<16 x double>* %a) #0 { +; CHECK-LABEL: fsqrt_v16f64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),16)]] +; CHECK: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK: fsqrt [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op = load <16 x double>, <16 x double>* %a + %res = call <16 x double> @llvm.sqrt.v16f64(<16 x double> %op) + store <16 x double> %res, <16 x double>* %a + ret void +} + +define void @fsqrt_v32f64(<32 x double>* %a) #0 { +; CHECK-LABEL: fsqrt_v32f64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),32)]] +; CHECK: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK: fsqrt [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op = load <32 x double>, <32 x double>* %a + %res = call <32 x double> @llvm.sqrt.v32f64(<32 x double> %op) + store <32 x double> %res, <32 x double>* %a + ret void +} + ; ; FSUB ; @@ -1302,3 +1730,22 @@ declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>) declare <16 x double> @llvm.fma.v16f64(<16 x double>, <16 x double>, <16 x double>) declare <32 x double> @llvm.fma.v32f64(<32 x double>, <32 x double>, <32 x double>) + +declare <4 x half> @llvm.sqrt.v4f16(<4 x half>) +declare <8 x half> @llvm.sqrt.v8f16(<8 x half>) +declare <16 x half> @llvm.sqrt.v16f16(<16 x half>) +declare <32 x half> @llvm.sqrt.v32f16(<32 x half>) +declare <64 x half> @llvm.sqrt.v64f16(<64 x half>) +declare <128 x half> @llvm.sqrt.v128f16(<128 x half>) +declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) +declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) +declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) +declare <32 x float> @llvm.sqrt.v32f32(<32 x float>) +declare <64 x float> @llvm.sqrt.v64f32(<64 x float>) +declare <1 x double> @llvm.sqrt.v1f64(<1 x double>) +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) +declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) +declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) +declare <16 x double> @llvm.sqrt.v16f64(<16 x double>) +declare <32 x double> @llvm.sqrt.v32f64(<32 x double>) diff --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll index 7ca1fdee7f32f..a54412cee9bf0 100644 --- a/llvm/test/CodeGen/AArch64/sve-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp.ll @@ -329,10 +329,8 @@ define @fneg_nxv8f16( %a) { ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fneg z0.h, p0/m, z0.h ; CHECK-NEXT: ret - %minus.one = insertelement undef, half -1.0, i64 0 - %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer - %neg = fmul %a, %minus.one.vec - ret %neg + %res = fneg %a + ret %res } define @fneg_nxv4f16( %a) { @@ -341,10 +339,8 @@ define @fneg_nxv4f16( %a) { ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fneg z0.h, p0/m, z0.h ; CHECK-NEXT: ret - %minus.one = insertelement undef, half -1.0, i64 0 - %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer - %neg = fmul %a, %minus.one.vec - ret %neg + %res = fneg %a + ret %res } define @fneg_nxv2f16( %a) { @@ -353,10 +349,8 @@ define @fneg_nxv2f16( %a) { ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fneg z0.h, p0/m, z0.h ; CHECK-NEXT: ret - %minus.one = insertelement undef, half -1.0, i64 0 - %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer - %neg = fmul %a, %minus.one.vec - ret %neg + %res = fneg %a + ret %res } define @fneg_nxv4f32( %a) { @@ -365,10 +359,8 @@ define @fneg_nxv4f32( %a) { ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fneg z0.s, p0/m, z0.s ; CHECK-NEXT: ret - %minus.one = insertelement undef, float -1.0, i64 0 - %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer - %neg = fmul %a, %minus.one.vec - ret %neg + %res = fneg %a + ret %res } define @fneg_nxv2f32( %a) { @@ -377,10 +369,8 @@ define @fneg_nxv2f32( %a) { ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fneg z0.s, p0/m, z0.s ; CHECK-NEXT: ret - %minus.one = insertelement undef, float -1.0, i64 0 - %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer - %neg = fmul %a, %minus.one.vec - ret %neg + %res = fneg %a + ret %res } define @fneg_nxv2f64( %a) { @@ -389,10 +379,8 @@ define @fneg_nxv2f64( %a) { ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fneg z0.d, p0/m, z0.d ; CHECK-NEXT: ret - %minus.one = insertelement undef, double -1.0, i64 0 - %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer - %neg = fmul %a, %minus.one.vec - ret %neg + %res = fneg %a + ret %res } define @frecps_h( %a, %b) { From 3cb1220709fa556d4d29ce0e25fd30a16895ae24 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Mon, 5 Oct 2020 21:41:35 +0200 Subject: [PATCH 122/321] [clangd] Add `score` extension to workspace/symbol response. The protocol doesn't really incorporate ranking. As with code completion, most clients respect what the server sends, but VSCode re-ranks items, with predictable results. See https://github.com/clangd/vscode-clangd/issues/81 There's no filterText field so we may be unable to construct a good workaround. But expose the score so we may be able to do this on the client in future. Differential Revision: https://reviews.llvm.org/D88844 --- clang-tools-extra/clangd/FindSymbols.cpp | 15 +++++++++------ clang-tools-extra/clangd/Protocol.cpp | 5 ++++- clang-tools-extra/clangd/Protocol.h | 18 +++++++++++++----- clang-tools-extra/clangd/test/symbols.test | 3 ++- 4 files changed, 28 insertions(+), 13 deletions(-) diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp index e86c01d4076eb..8e21ae22dcd92 100644 --- a/clang-tools-extra/clangd/FindSymbols.cpp +++ b/clang-tools-extra/clangd/FindSymbols.cpp @@ -96,12 +96,13 @@ getWorkspaceSymbols(llvm::StringRef Query, int Limit, return; } - SymbolKind SK = indexSymbolKindToSymbolKind(Sym.SymInfo.Kind); - std::string Scope = std::string(Sym.Scope); - llvm::StringRef ScopeRef = Scope; - ScopeRef.consume_back("::"); - SymbolInformation Info = {(Sym.Name + Sym.TemplateSpecializationArgs).str(), - SK, *Loc, std::string(ScopeRef)}; + llvm::StringRef Scope = Sym.Scope; + Scope.consume_back("::"); + SymbolInformation Info; + Info.name = (Sym.Name + Sym.TemplateSpecializationArgs).str(); + Info.kind = indexSymbolKindToSymbolKind(Sym.SymInfo.Kind); + Info.location = *Loc; + Info.containerName = Scope.str(); SymbolQualitySignals Quality; Quality.merge(Sym); @@ -121,6 +122,8 @@ getWorkspaceSymbols(llvm::StringRef Query, int Limit, dlog("FindSymbols: {0}{1} = {2}\n{3}{4}\n", Sym.Scope, Sym.Name, Score, Quality, Relevance); + // Exposed score excludes fuzzy-match component, for client-side re-ranking. + Info.score = Score / Relevance.NameMatch; Top.push({Score, std::move(Info)}); }); for (auto &R : std::move(Top).items()) diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp index 61a691f2048f0..5d50a7bea0349 100644 --- a/clang-tools-extra/clangd/Protocol.cpp +++ b/clang-tools-extra/clangd/Protocol.cpp @@ -662,12 +662,15 @@ bool fromJSON(const llvm::json::Value &Params, ExecuteCommandParams &R, } llvm::json::Value toJSON(const SymbolInformation &P) { - return llvm::json::Object{ + llvm::json::Object O{ {"name", P.name}, {"kind", static_cast(P.kind)}, {"location", P.location}, {"containerName", P.containerName}, }; + if (P.score) + O["score"] = *P.score; + return std::move(O); } llvm::raw_ostream &operator<<(llvm::raw_ostream &O, diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h index 4ef94e6e01db9..6f395ffb21c53 100644 --- a/clang-tools-extra/clangd/Protocol.h +++ b/clang-tools-extra/clangd/Protocol.h @@ -1015,6 +1015,14 @@ struct SymbolInformation { /// The name of the symbol containing this symbol. std::string containerName; + + /// The score that clangd calculates to rank the returned symbols. + /// This excludes the fuzzy-matching score between `name` and the query. + /// (Specifically, the last ::-separated component). + /// This can be used to re-rank results as the user types, using client-side + /// fuzzy-matching (that score should be multiplied with this one). + /// This is a clangd extension, set only for workspace/symbol responses. + llvm::Optional score; }; llvm::json::Value toJSON(const SymbolInformation &); llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolInformation &); @@ -1175,11 +1183,11 @@ struct CompletionItem { /// Indicates if this item is deprecated. bool deprecated = false; - /// This is Clangd extension. - /// The score that Clangd calculates to rank completion items. This score can - /// be used to adjust the ranking on the client side. - /// NOTE: This excludes fuzzy matching score which is typically calculated on - /// the client side. + /// The score that clangd calculates to rank the returned completions. + /// This excludes the fuzzy-match between `filterText` and the partial word. + /// This can be used to re-rank results as the user types, using client-side + /// fuzzy-matching (that score should be multiplied with this one). + /// This is a clangd extension. float score = 0.f; // TODO: Add custom commitCharacters for some of the completion items. For diff --git a/clang-tools-extra/clangd/test/symbols.test b/clang-tools-extra/clangd/test/symbols.test index 38c5359074e57..6ab058da88362 100644 --- a/clang-tools-extra/clangd/test/symbols.test +++ b/clang-tools-extra/clangd/test/symbols.test @@ -23,7 +23,8 @@ # CHECK-NEXT: }, # CHECK-NEXT: "uri": "file://{{.*}}/vector.h" # CHECK-NEXT: }, -# CHECK-NEXT: "name": "vector" +# CHECK-NEXT: "name": "vector", +# CHECK-NEXT: "score": {{.*}} # CHECK-NEXT: } # CHECK-NEXT: ] # CHECK-NEXT:} From f0a78bdfdc6d56b25e0081884580b3960a3c2429 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 21 Sep 2020 11:29:06 +0100 Subject: [PATCH 123/321] [AArch64] Correct parameter type for unsigned Neon scalar shift intrinsics In the following intrinsics the shift amount (parameter 2) should be signed. vqshlb_u8 vqshlh_u16 vqshls_u32 vqshld_u64 vqrshlb_u8 vqrshlh_u16 vqrshls_u32 vqrshld_u64 vshld_u64 vrshld_u64 See https://developer.arm.com/documentation/ihi0073/latest Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D88013 --- clang/include/clang/Basic/arm_neon.td | 8 ++++---- clang/test/CodeGen/aarch64-neon-intrinsics.c | 20 ++++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 66b805addd836..feccf2e15dc0a 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -1263,13 +1263,13 @@ def VGET_LOW_A64 : NoTestOpInst<"vget_low", ".Q", "dPl", OP_LO>; //////////////////////////////////////////////////////////////////////////////// // Scalar Shift // Scalar Shift Left -def SCALAR_SHL: SInst<"vshl", "111", "SlSUl">; +def SCALAR_SHL: SInst<"vshl", "11(S1)", "SlSUl">; // Scalar Saturating Shift Left -def SCALAR_QSHL: SInst<"vqshl", "111", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_QSHL: SInst<"vqshl", "11(S1)", "ScSsSiSlSUcSUsSUiSUl">; // Scalar Saturating Rounding Shift Left -def SCALAR_QRSHL: SInst<"vqrshl", "111", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_QRSHL: SInst<"vqrshl", "11(S1)", "ScSsSiSlSUcSUsSUiSUl">; // Scalar Shift Rounding Left -def SCALAR_RSHL: SInst<"vrshl", "111", "SlSUl">; +def SCALAR_RSHL: SInst<"vrshl", "11(S1)", "SlSUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Shift (Immediate) diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c index a24e3c7e5db7e..a56080bace0fd 100644 --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -8548,7 +8548,7 @@ int64_t test_vshld_s64(int64_t a, int64_t b) { // CHECK-LABEL: @test_vshld_u64( // CHECK: [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VSHLD_U64_I]] -uint64_t test_vshld_u64(uint64_t a, uint64_t b) { +uint64_t test_vshld_u64(uint64_t a, int64_t b) { return vshld_u64(a, b); } @@ -8592,7 +8592,7 @@ int64_t test_vqshld_s64(int64_t a, int64_t b) { // CHECK: [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0 // CHECK: ret i8 [[TMP2]] -uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) { +uint8_t test_vqshlb_u8(uint8_t a, int8_t b) { return vqshlb_u8(a, b); } @@ -8602,21 +8602,21 @@ uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) { // CHECK: [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0 // CHECK: ret i16 [[TMP2]] -uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) { +uint16_t test_vqshlh_u16(uint16_t a, int16_t b) { return vqshlh_u16(a, b); } // CHECK-LABEL: @test_vqshls_u32( // CHECK: [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b) // CHECK: ret i32 [[VQSHLS_U32_I]] -uint32_t test_vqshls_u32(uint32_t a, uint32_t b) { +uint32_t test_vqshls_u32(uint32_t a, int32_t b) { return vqshls_u32(a, b); } // CHECK-LABEL: @test_vqshld_u64( // CHECK: [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VQSHLD_U64_I]] -uint64_t test_vqshld_u64(uint64_t a, uint64_t b) { +uint64_t test_vqshld_u64(uint64_t a, int64_t b) { return vqshld_u64(a, b); } @@ -8630,7 +8630,7 @@ int64_t test_vrshld_s64(int64_t a, int64_t b) { // CHECK-LABEL: @test_vrshld_u64( // CHECK: [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VRSHLD_U64_I]] -uint64_t test_vrshld_u64(uint64_t a, uint64_t b) { +uint64_t test_vrshld_u64(uint64_t a, int64_t b) { return vrshld_u64(a, b); } @@ -8674,7 +8674,7 @@ int64_t test_vqrshld_s64(int64_t a, int64_t b) { // CHECK: [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0 // CHECK: ret i8 [[TMP2]] -uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) { +uint8_t test_vqrshlb_u8(uint8_t a, int8_t b) { return vqrshlb_u8(a, b); } @@ -8684,21 +8684,21 @@ uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) { // CHECK: [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0 // CHECK: ret i16 [[TMP2]] -uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) { +uint16_t test_vqrshlh_u16(uint16_t a, int16_t b) { return vqrshlh_u16(a, b); } // CHECK-LABEL: @test_vqrshls_u32( // CHECK: [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b) // CHECK: ret i32 [[VQRSHLS_U32_I]] -uint32_t test_vqrshls_u32(uint32_t a, uint32_t b) { +uint32_t test_vqrshls_u32(uint32_t a, int32_t b) { return vqrshls_u32(a, b); } // CHECK-LABEL: @test_vqrshld_u64( // CHECK: [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VQRSHLD_U64_I]] -uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) { +uint64_t test_vqrshld_u64(uint64_t a, int64_t b) { return vqrshld_u64(a, b); } From 04f908b9f0d637fc5ba3dd32437ffdf89623b1d8 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Tue, 6 Oct 2020 11:38:52 +0100 Subject: [PATCH 124/321] [libcxx][lit] Add support for custom ssh/scp flags in ssh.py In our CHERI Jenkins CI we need to pass `-F ` to each ssh/scp command to set various arguments such as the localhost port, usage of controlmaster, etc. to speed up connections to our emulated QEMU systems. For our specific use-case I could have also added a single --ssh-config-file argument that can be used for both the scp and ssh commands, but being able to pass arbitrary extra flags for both commands seems more flexible. Reviewed By: #libc, ldionne Differential Revision: https://reviews.llvm.org/D84097 --- libcxx/utils/ssh.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/libcxx/utils/ssh.py b/libcxx/utils/ssh.py index 876e35460dc7d..8a30cc3865ed4 100755 --- a/libcxx/utils/ssh.py +++ b/libcxx/utils/ssh.py @@ -17,28 +17,41 @@ import argparse import os import posixpath +import shlex import subprocess import sys import tarfile import tempfile +def ssh(args, command): + cmd = ['ssh', '-oBatchMode=yes'] + if args.extra_ssh_args is not None: + cmd.extend(shlex.split(args.extra_ssh_args)) + return cmd + [args.host, command] + + +def scp(args, src, dst): + cmd = ['scp', '-q', '-oBatchMode=yes'] + if args.extra_scp_args is not None: + cmd.extend(shlex.split(args.extra_scp_args)) + return cmd + [src, '{}:{}'.format(args.host, dst)] + def main(): parser = argparse.ArgumentParser() parser.add_argument('--host', type=str, required=True) parser.add_argument('--execdir', type=str, required=True) + parser.add_argument('--extra-ssh-args', type=str, required=False) + parser.add_argument('--extra-scp-args', type=str, required=False) parser.add_argument('--codesign_identity', type=str, required=False, default=None) parser.add_argument('--env', type=str, nargs='*', required=False, default=dict()) parser.add_argument("command", nargs=argparse.ONE_OR_MORE) args = parser.parse_args() commandLine = args.command - ssh = lambda command: ['ssh', '-oBatchMode=yes', args.host, command] - scp = lambda src, dst: ['scp', '-q', '-oBatchMode=yes', src, '{}:{}'.format(args.host, dst)] - # Create a temporary directory where the test will be run. # That is effectively the value of %T on the remote host. - tmp = subprocess.check_output(ssh('mktemp -d /tmp/libcxx.XXXXXXXXXX'), universal_newlines=True).strip() + tmp = subprocess.check_output(ssh(args, 'mktemp -d /tmp/libcxx.XXXXXXXXXX'), universal_newlines=True).strip() # HACK: # If an argument is a file that ends in `.tmp.exe`, assume it is the name @@ -67,7 +80,7 @@ def main(): # the temporary file while still open doesn't work on Windows. tmpTar.close() remoteTarball = pathOnRemote(tmpTar.name) - subprocess.check_call(scp(tmpTar.name, remoteTarball)) + subprocess.check_call(scp(args, tmpTar.name, remoteTarball)) finally: # Make sure we close the file in case an exception happens before # we've closed it above -- otherwise close() is idempotent. @@ -97,12 +110,12 @@ def main(): remoteCommands.append(subprocess.list2cmdline(commandLine)) # Finally, SSH to the remote host and execute all the commands. - rc = subprocess.call(ssh(' && '.join(remoteCommands))) + rc = subprocess.call(ssh(args, ' && '.join(remoteCommands))) return rc finally: # Make sure the temporary directory is removed when we're done. - subprocess.check_call(ssh('rm -r {}'.format(tmp))) + subprocess.check_call(ssh(args, 'rm -r {}'.format(tmp))) if __name__ == '__main__': From c08d48fc2d7cced7b86043854c235394e87c4506 Mon Sep 17 00:00:00 2001 From: Denis Antrushin Date: Sat, 5 Sep 2020 00:45:41 +0700 Subject: [PATCH 125/321] [Statepoints] Change statepoint machine instr format to better suit VReg lowering. Current Statepoint MI format is this: STATEPOINT , , , , [call arguments...], , , , , , , [deopt args...], Note that GC pointers are listed in pairs . This causes base pointers to appear many times (at least twice) in instruction, which is bad for us when VReg lowering is ON. The problem is that machine operand tiedness is 1-1 relation, so it might look like this: %vr2 = STATEPOINT ... %vr1, %vr1(tied-def0) Since only one instance of %vr1 is tied, that may lead to incorrect codegen (see PR46917 for more details), so we have to always spill base pointers. This mostly defeats new VReg lowering scheme. This patch changes statepoint instruction format so that every gc pointer appears only once in operand list. That way they all can be tied. Additional set of operands is added to preserve base-derived relation required to build stackmap. New statepoint has following format: STATEPOINT , , , , [call arguments...], , , , , , , [deopt args...], , , [gc pointers...], , , [gc allocas...] , , [base/derived indices...] Changes are: - every gc pointer is listed only once in a flat length-prefixed list; - alloca list is prefixed with its length too; - following alloca list is length-prefixed list of base-derived indices of pointers from gc pointer list. Note that indices are logical (number of pointer), not absolute (index of machine operand). Differential Revision: https://reviews.llvm.org/D87154 --- llvm/include/llvm/CodeGen/StackMaps.h | 32 ++++- .../CodeGen/FixupStatepointCallerSaved.cpp | 49 +------ .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 22 +-- .../SelectionDAG/StatepointLowering.cpp | 104 +++++++++----- llvm/lib/CodeGen/StackMaps.cpp | 134 +++++++++++++++++- .../CodeGen/X86/statepoint-stack-usage.ll | 18 +-- llvm/test/CodeGen/X86/statepoint-vector.ll | 10 +- .../CodeGen/X86/statepoint-vreg-details.ll | 74 ++++------ llvm/test/CodeGen/X86/statepoint-vreg.ll | 96 ++++--------- llvm/test/CodeGen/X86/statepoint-vreg.mir | 2 +- 10 files changed, 306 insertions(+), 235 deletions(-) diff --git a/llvm/include/llvm/CodeGen/StackMaps.h b/llvm/include/llvm/CodeGen/StackMaps.h index 578bc0e161a64..b91c75a2db73e 100644 --- a/llvm/include/llvm/CodeGen/StackMaps.h +++ b/llvm/include/llvm/CodeGen/StackMaps.h @@ -148,9 +148,13 @@ class PatchPointOpers { /// , , /// , , /// , , [deopt args...], -/// -/// Note that the last two sets of arguments are not currently length -/// prefixed. +/// , , [gc pointer args...], +/// , , [gc allocas args...], +/// , , [base/derived pairs] +/// base/derived pairs in gc map are logical indices into +/// section. +/// All gc pointers assigned to VRegs produce new value (in form of MI Def +/// operand) and are tied to it. class StatepointOpers { // TODO:: we should change the STATEPOINT representation so that CC and // Flags should be part of meta operands, with args and deopt operands, and @@ -217,6 +221,19 @@ class StatepointOpers { /// Return the statepoint flags. uint64_t getFlags() const { return MI->getOperand(getFlagsIdx()).getImm(); } + uint64_t getNumDeoptArgs() const { + return MI->getOperand(getNumDeoptArgsIdx()).getImm(); + } + + /// Get index of first GC pointer operand of -1 if there are none. + int getFirstGCPtrIdx(); + + /// Get vector of base/derived pairs from statepoint. + /// Elements are indices into GC Pointer operand list (logical). + /// Returns number of elements in GCMap. + unsigned + getGCPointerMap(SmallVectorImpl> &GCMap); + private: const MachineInstr *MI; unsigned NumDefs; @@ -263,7 +280,7 @@ class StackMaps { /// Get index of next meta operand. /// Similar to parseOperand, but does not actually parses operand meaning. - static unsigned getNextMetaArgIdx(MachineInstr *MI, unsigned CurIdx); + static unsigned getNextMetaArgIdx(const MachineInstr *MI, unsigned CurIdx); void reset() { CSInfos.clear(); @@ -337,6 +354,13 @@ class StackMaps { MachineInstr::const_mop_iterator MOE, LocationVec &Locs, LiveOutVec &LiveOuts) const; + /// Specialized parser of statepoint operands. + /// They do not directly correspond to StackMap record entries. + void parseStatepointOpers(const MachineInstr &MI, + MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE, + LocationVec &Locations, LiveOutVec &LiveOuts); + /// Create a live-out register record for the given register @p Reg. LiveOutReg createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const; diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp index 6d31097778ca9..34a494cef9c5f 100644 --- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp +++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -98,48 +98,6 @@ static unsigned getRegisterSize(const TargetRegisterInfo &TRI, Register Reg) { return TRI.getSpillSize(*RC); } -// Advance iterator to the next stack map entry -static MachineInstr::const_mop_iterator -advanceToNextStackMapElt(MachineInstr::const_mop_iterator MOI) { - if (MOI->isImm()) { - switch (MOI->getImm()) { - default: - llvm_unreachable("Unrecognized operand type."); - case StackMaps::DirectMemRefOp: - MOI += 2; // , - break; - case StackMaps::IndirectMemRefOp: - MOI += 3; // , , - break; - case StackMaps::ConstantOp: - MOI += 1; - break; - } - } - return ++MOI; -} - -// Return statepoint GC args as a set -static SmallSet collectGCRegs(MachineInstr &MI) { - StatepointOpers SO(&MI); - unsigned NumDeoptIdx = SO.getNumDeoptArgsIdx(); - unsigned NumDeoptArgs = MI.getOperand(NumDeoptIdx).getImm(); - MachineInstr::const_mop_iterator MOI(MI.operands_begin() + NumDeoptIdx + 1), - MOE(MI.operands_end()); - - // Skip deopt args - while (NumDeoptArgs--) - MOI = advanceToNextStackMapElt(MOI); - - SmallSet Result; - while (MOI != MOE) { - if (MOI->isReg() && !MOI->isImplicit()) - Result.insert(MOI->getReg()); - MOI = advanceToNextStackMapElt(MOI); - } - return Result; -} - // Try to eliminate redundant copy to register which we're going to // spill, i.e. try to change: // X = COPY Y @@ -411,8 +369,13 @@ class StatepointState { // Also cache the size of found registers. // Returns true if caller save registers found. bool findRegistersToSpill() { + SmallSet GCRegs; + // All GC pointer operands assigned to registers produce new value. + // Since they're tied to their defs, it is enough to collect def registers. + for (const auto &Def : MI.defs()) + GCRegs.insert(Def.getReg()); + SmallSet VisitedRegs; - SmallSet GCRegs = collectGCRegs(MI); for (unsigned Idx = StatepointOpers(&MI).getVarIdx(), EndIdx = MI.getNumOperands(); Idx < EndIdx; ++Idx) { diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index e2da367cfe3f6..be6f2254a8588 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -82,19 +82,6 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, return N; } -/// Return starting index of GC operand list. -// FIXME: need a better place for this. Put it in StackMaps? -static unsigned getStatepointGCArgStartIdx(MachineInstr *MI) { - assert(MI->getOpcode() == TargetOpcode::STATEPOINT && - "STATEPOINT node expected"); - unsigned OperIdx = StatepointOpers(MI).getNumDeoptArgsIdx(); - unsigned NumDeopts = MI->getOperand(OperIdx).getImm(); - ++OperIdx; - while (NumDeopts--) - OperIdx = StackMaps::getNextMetaArgIdx(MI, OperIdx); - return OperIdx; -} - /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void InstrEmitter:: @@ -993,14 +980,13 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, assert(!HasPhysRegOuts && "STATEPOINT mishandled"); MachineInstr *MI = MIB; unsigned Def = 0; - unsigned Use = getStatepointGCArgStartIdx(MI); - Use = StackMaps::getNextMetaArgIdx(MI, Use); // first derived - assert(Use < MI->getNumOperands()); + int First = StatepointOpers(MI).getFirstGCPtrIdx(); + assert(First > 0 && "Statepoint has Defs but no GC ptr list"); + unsigned Use = (unsigned)First; while (Def < NumDefs) { if (MI->getOperand(Use).isReg()) MI->tieOperands(Def++, Use); - Use = StackMaps::getNextMetaArgIdx(MI, Use); // next base - Use = StackMaps::getNextMetaArgIdx(MI, Use); // next derived + Use = StackMaps::getNextMetaArgIdx(MI, Use); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 9cb7f45db096a..1c129f3ef7262 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -495,6 +495,7 @@ lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot, static void lowerStatepointMetaArgs(SmallVectorImpl &Ops, SmallVectorImpl &MemRefs, + SmallVectorImpl &GCPtrs, DenseMap &LowerAsVReg, SelectionDAGBuilder::StatepointLoweringInfo &SI, SelectionDAGBuilder &Builder) { @@ -547,21 +548,39 @@ lowerStatepointMetaArgs(SmallVectorImpl &Ops, unsigned MaxVRegPtrs = std::min(MaxTiedRegs, MaxRegistersForGCPointers.getValue()); - LLVM_DEBUG(dbgs() << "Desiding how to lower GC Pointers:\n"); + LLVM_DEBUG(dbgs() << "Deciding how to lower GC Pointers:\n"); + + // List of unique lowered GC Pointer values. + SmallSetVector LoweredGCPtrs; + // Map lowered GC Pointer value to the index in above vector + DenseMap GCPtrIndexMap; + unsigned CurNumVRegs = 0; - for (const Value *P : SI.Ptrs) { + + auto processGCPtr = [&](const Value *V) { + SDValue PtrSD = Builder.getValue(V); + if (!LoweredGCPtrs.insert(PtrSD)) + return; // skip duplicates + GCPtrIndexMap[PtrSD] = LoweredGCPtrs.size() - 1; + + assert(!LowerAsVReg.count(PtrSD) && "must not have been seen"); if (LowerAsVReg.size() == MaxVRegPtrs) - break; - SDValue PtrSD = Builder.getValue(P); - if (willLowerDirectly(PtrSD) || P->getType()->isVectorTy()) { + return; + if (willLowerDirectly(PtrSD) || V->getType()->isVectorTy()) { LLVM_DEBUG(dbgs() << "direct/spill "; PtrSD.dump(&Builder.DAG)); - continue; + return; } LLVM_DEBUG(dbgs() << "vreg "; PtrSD.dump(&Builder.DAG)); LowerAsVReg[PtrSD] = CurNumVRegs++; - } - LLVM_DEBUG(dbgs() << LowerAsVReg.size() - << " derived pointers will go in vregs\n"); + }; + + // Process derived pointers first to give them more chance to go on VReg. + for (const Value *V : SI.Ptrs) + processGCPtr(V); + for (const Value *V : SI.Bases) + processGCPtr(V); + + LLVM_DEBUG(dbgs() << LowerAsVReg.size() << " pointers will go in vregs\n"); auto isGCValue = [&](const Value *V) { auto *Ty = V->getType(); @@ -589,13 +608,16 @@ lowerStatepointMetaArgs(SmallVectorImpl &Ops, reservePreviousStackSlotForValue(V, Builder); } - for (unsigned i = 0; i < SI.Bases.size(); ++i) { - SDValue SDV = Builder.getValue(SI.Bases[i]); - if (AlwaysSpillBase || !LowerAsVReg.count(SDV)) - reservePreviousStackSlotForValue(SI.Bases[i], Builder); - SDV = Builder.getValue(SI.Ptrs[i]); + for (const Value *V : SI.Ptrs) { + SDValue SDV = Builder.getValue(V); + if (!LowerAsVReg.count(SDV)) + reservePreviousStackSlotForValue(V, Builder); + } + + for (const Value *V : SI.Bases) { + SDValue SDV = Builder.getValue(V); if (!LowerAsVReg.count(SDV)) - reservePreviousStackSlotForValue(SI.Ptrs[i], Builder); + reservePreviousStackSlotForValue(V, Builder); } // First, prefix the list with the number of unique values to be @@ -624,43 +646,51 @@ lowerStatepointMetaArgs(SmallVectorImpl &Ops, Builder); } - // Finally, go ahead and lower all the gc arguments. There's no prefixed - // length for this one. After lowering, we'll have the base and pointer - // arrays interwoven with each (lowered) base pointer immediately followed by - // it's (lowered) derived pointer. i.e - // (base[0], ptr[0], base[1], ptr[1], ...) - for (unsigned i = 0; i < SI.Bases.size(); ++i) { - bool RequireSpillSlot; - SDValue Base = Builder.getValue(SI.Bases[i]); - RequireSpillSlot = AlwaysSpillBase || !LowerAsVReg.count(Base); - lowerIncomingStatepointValue(Base, RequireSpillSlot, Ops, MemRefs, + // Finally, go ahead and lower all the gc arguments. + pushStackMapConstant(Ops, Builder, LoweredGCPtrs.size()); + for (SDValue SDV : LoweredGCPtrs) + lowerIncomingStatepointValue(SDV, !LowerAsVReg.count(SDV), Ops, MemRefs, Builder); - SDValue Derived = Builder.getValue(SI.Ptrs[i]); - RequireSpillSlot = !LowerAsVReg.count(Derived); - lowerIncomingStatepointValue(Derived, RequireSpillSlot, Ops, MemRefs, - Builder); - } + // Copy to out vector. LoweredGCPtrs will be empty after this point. + GCPtrs = LoweredGCPtrs.takeVector(); // If there are any explicit spill slots passed to the statepoint, record // them, but otherwise do not do anything special. These are user provided // allocas and give control over placement to the consumer. In this case, // it is the contents of the slot which may get updated, not the pointer to // the alloca + SmallVector Allocas; for (Value *V : SI.GCArgs) { SDValue Incoming = Builder.getValue(V); if (FrameIndexSDNode *FI = dyn_cast(Incoming)) { // This handles allocas as arguments to the statepoint assert(Incoming.getValueType() == Builder.getFrameIndexTy() && "Incoming value is a frame index!"); - Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), - Builder.getFrameIndexTy())); + Allocas.push_back(Builder.DAG.getTargetFrameIndex( + FI->getIndex(), Builder.getFrameIndexTy())); auto &MF = Builder.DAG.getMachineFunction(); auto *MMO = getMachineMemOperand(MF, *FI); MemRefs.push_back(MMO); } } + pushStackMapConstant(Ops, Builder, Allocas.size()); + Ops.append(Allocas.begin(), Allocas.end()); + + // Now construct GC base/derived map; + pushStackMapConstant(Ops, Builder, SI.Ptrs.size()); + SDLoc L = Builder.getCurSDLoc(); + for (unsigned i = 0; i < SI.Ptrs.size(); ++i) { + SDValue Base = Builder.getValue(SI.Bases[i]); + assert(GCPtrIndexMap.count(Base) && "base not found in index map"); + Ops.push_back( + Builder.DAG.getTargetConstant(GCPtrIndexMap[Base], L, MVT::i64)); + SDValue Derived = Builder.getValue(SI.Ptrs[i]); + assert(GCPtrIndexMap.count(Derived) && "derived not found in index map"); + Ops.push_back( + Builder.DAG.getTargetConstant(GCPtrIndexMap[Derived], L, MVT::i64)); + } } SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( @@ -683,11 +713,16 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( #endif // Lower statepoint vmstate and gcstate arguments + + // All lowered meta args. SmallVector LoweredMetaArgs; + // Lowered GC pointers (subset of above). + SmallVector LoweredGCArgs; SmallVector MemRefs; // Maps derived pointer SDValue to statepoint result of relocated pointer. DenseMap LowerAsVReg; - lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, LowerAsVReg, SI, *this); + lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, LoweredGCArgs, LowerAsVReg, + SI, *this); // Now that we've emitted the spills, we need to update the root so that the // call sequence is ordered correctly. @@ -802,8 +837,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( // Compute return values. Provide a glue output since we consume one as // input. This allows someone else to chain off us as needed. SmallVector NodeTys; - for (auto &Ptr : SI.Ptrs) { - SDValue SD = getValue(Ptr); + for (auto SD : LoweredGCArgs) { if (!LowerAsVReg.count(SD)) continue; NodeTys.push_back(SD.getValueType()); diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp index 806ba1aa98226..bdcadab0df08e 100644 --- a/llvm/lib/CodeGen/StackMaps.cpp +++ b/llvm/lib/CodeGen/StackMaps.cpp @@ -45,6 +45,14 @@ static cl::opt StackMapVersion( const char *StackMaps::WSMP = "Stack Maps: "; +static uint64_t getConstMetaVal(const MachineInstr &MI, unsigned Idx) { + assert(MI.getOperand(Idx).isImm() && + MI.getOperand(Idx).getImm() == StackMaps::ConstantOp); + const auto &MO = MI.getOperand(Idx + 1); + assert(MO.isImm()); + return MO.getImm(); +} + StackMapOpers::StackMapOpers(const MachineInstr *MI) : MI(MI) { assert(getVarIdx() <= MI->getNumOperands() && @@ -83,12 +91,56 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const { return ScratchIdx; } +int StatepointOpers::getFirstGCPtrIdx() { + unsigned NumDeoptsIdx = getNumDeoptArgsIdx(); + unsigned NumDeoptArgs = MI->getOperand(NumDeoptsIdx).getImm(); + + unsigned CurIdx = NumDeoptsIdx + 1; + while (NumDeoptArgs--) { + CurIdx = StackMaps::getNextMetaArgIdx(MI, CurIdx); + } + ++CurIdx; // + unsigned NumGCPtrs = MI->getOperand(CurIdx).getImm(); + if (NumGCPtrs == 0) + return -1; + ++CurIdx; // + assert(CurIdx < MI->getNumOperands() && "Index points past operand list"); + return (int)CurIdx; +} + +unsigned StatepointOpers::getGCPointerMap( + SmallVectorImpl> &GCMap) { + int FirstGCIdx = getFirstGCPtrIdx(); + if (FirstGCIdx == -1) + return 0; + unsigned NumGCPtr = getConstMetaVal(*MI, (unsigned)FirstGCIdx - 2); + unsigned CurIdx = (unsigned)FirstGCIdx; + while (NumGCPtr--) + CurIdx = StackMaps::getNextMetaArgIdx(MI, CurIdx); + + unsigned NumAllocas = getConstMetaVal(*MI, CurIdx); + CurIdx += 2; + while (NumAllocas--) + CurIdx = StackMaps::getNextMetaArgIdx(MI, CurIdx); + + assert(CurIdx < MI->getNumOperands()); + unsigned GCMapSize = getConstMetaVal(*MI, CurIdx); + CurIdx += 2; + for (unsigned N = 0; N < GCMapSize; ++N) { + unsigned B = MI->getOperand(CurIdx++).getImm(); + unsigned D = MI->getOperand(CurIdx++).getImm(); + GCMap.push_back(std::make_pair(B, D)); + } + + return GCMapSize; +} + StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { if (StackMapVersion != 3) llvm_unreachable("Unsupported stackmap version!"); } -unsigned StackMaps::getNextMetaArgIdx(MachineInstr *MI, unsigned CurIdx) { +unsigned StackMaps::getNextMetaArgIdx(const MachineInstr *MI, unsigned CurIdx) { assert(CurIdx < MI->getNumOperands() && "Bad meta arg index"); const auto &MO = MI->getOperand(CurIdx); if (MO.isImm()) { @@ -317,6 +369,76 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { return LiveOuts; } +// See statepoint MI format description in StatepointOpers' class comment +// in include/llvm/CodeGen/StackMaps.h +void StackMaps::parseStatepointOpers(const MachineInstr &MI, + MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE, + LocationVec &Locations, + LiveOutVec &LiveOuts) { + LLVM_DEBUG(dbgs() << "record statepoint : " << MI << "\n"); + StatepointOpers SO(&MI); + MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // CC + MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // Flags + MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // Num Deopts + + // Record Deopt Args. + unsigned NumDeoptArgs = Locations.back().Offset; + assert(Locations.back().Type = Location::Constant); + assert(NumDeoptArgs == SO.getNumDeoptArgs()); + + while (NumDeoptArgs--) + MOI = parseOperand(MOI, MOE, Locations, LiveOuts); + + // Record gc base/derived pairs + assert(MOI->isImm() && MOI->getImm() == StackMaps::ConstantOp); + ++MOI; + assert(MOI->isImm()); + unsigned NumGCPointers = MOI->getImm(); + ++MOI; + if (NumGCPointers) { + // Map logical index of GC ptr to MI operand index. + SmallVector GCPtrIndices; + unsigned GCPtrIdx = (unsigned)SO.getFirstGCPtrIdx(); + assert((int)GCPtrIdx != -1); + assert(MOI - MI.operands_begin() == GCPtrIdx); + while (NumGCPointers--) { + GCPtrIndices.push_back(GCPtrIdx); + GCPtrIdx = StackMaps::getNextMetaArgIdx(&MI, GCPtrIdx); + } + + SmallVector, 8> GCPairs; + unsigned NumGCPairs = SO.getGCPointerMap(GCPairs); + LLVM_DEBUG(dbgs() << "NumGCPairs = " << NumGCPairs << "\n"); + + auto MOB = MI.operands_begin(); + for (auto &P : GCPairs) { + assert(P.first < GCPtrIndices.size() && "base pointer index not found"); + assert(P.second < GCPtrIndices.size() && + "derived pointer index not found"); + unsigned BaseIdx = GCPtrIndices[P.first]; + unsigned DerivedIdx = GCPtrIndices[P.second]; + LLVM_DEBUG(dbgs() << "Base : " << BaseIdx << " Derived : " << DerivedIdx + << "\n"); + (void)parseOperand(MOB + BaseIdx, MOE, Locations, LiveOuts); + (void)parseOperand(MOB + DerivedIdx, MOE, Locations, LiveOuts); + } + + MOI = MOB + GCPtrIdx; + } + + // Record gc allocas + assert(MOI < MOE); + assert(MOI->isImm() && MOI->getImm() == StackMaps::ConstantOp); + ++MOI; + unsigned NumAllocas = MOI->getImm(); + ++MOI; + while (NumAllocas--) { + MOI = parseOperand(MOI, MOE, Locations, LiveOuts); + assert(MOI < MOE); + } +} + void StackMaps::recordStackMapOpers(const MCSymbol &MILabel, const MachineInstr &MI, uint64_t ID, MachineInstr::const_mop_iterator MOI, @@ -334,9 +456,11 @@ void StackMaps::recordStackMapOpers(const MCSymbol &MILabel, } // Parse operands. - while (MOI != MOE) { - MOI = parseOperand(MOI, MOE, Locations, LiveOuts); - } + if (MI.getOpcode() == TargetOpcode::STATEPOINT) + parseStatepointOpers(MI, MOI, MOE, Locations, LiveOuts); + else + while (MOI != MOE) + MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // Move large constants into the constant pool. for (auto &Loc : Locations) { @@ -417,8 +541,6 @@ void StackMaps::recordStatepoint(const MCSymbol &L, const MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint"); StatepointOpers opers(&MI); - // Record all the deopt and gc operands (they're contiguous and run from the - // initial index to the end of the operand list) const unsigned StartIdx = opers.getVarIdx(); recordStackMapOpers(L, MI, opers.getID(), MI.operands_begin() + StartIdx, MI.operands_end(), false); diff --git a/llvm/test/CodeGen/X86/statepoint-stack-usage.ll b/llvm/test/CodeGen/X86/statepoint-stack-usage.ll index 4e355b2372ec9..d86af6ef1f227 100644 --- a/llvm/test/CodeGen/X86/statepoint-stack-usage.ll +++ b/llvm/test/CodeGen/X86/statepoint-stack-usage.ll @@ -11,9 +11,9 @@ target triple = "x86_64-pc-linux-gnu" define i32 @back_to_back_calls(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" { ; CHECK-LABEL: back_to_back_calls ; The exact stores don't matter, but there need to be three stack slots created -; CHECK-DAG: movq %rdi, 16(%rsp) -; CHECK-DAG: movq %rdx, 8(%rsp) -; CHECK-DAG: movq %rsi, (%rsp) +; CHECK-DAG: movq %rdi, {{[0-9]*}}(%rsp) +; CHECK-DAG: movq %rdx, {{[0-9]*}}(%rsp) +; CHECK-DAG: movq %rsi, {{[0-9]*}}(%rsp) ; There should be no more than three moves ; CHECK-NOT: movq %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c), "deopt" (i32 0, i32 -1, i32 0, i32 0, i32 0)] @@ -36,9 +36,9 @@ define i32 @back_to_back_calls(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 a define i32 @reserve_first(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" { ; CHECK-LABEL: reserve_first ; The exact stores don't matter, but there need to be three stack slots created -; CHECK-DAG: movq %rdi, 16(%rsp) -; CHECK-DAG: movq %rdx, 8(%rsp) -; CHECK-DAG: movq %rsi, (%rsp) +; CHECK-DAG: movq %rdi, {{[0-9]*}}(%rsp) +; CHECK-DAG: movq %rdx, {{[0-9]*}}(%rsp) +; CHECK-DAG: movq %rsi, {{[0-9]*}}(%rsp) %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c), "deopt" (i32 0, i32 -1, i32 0, i32 0, i32 0)] %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 0) %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 1) @@ -89,9 +89,9 @@ define i32 @back_to_back_invokes(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 ; CHECK-LABEL: back_to_back_invokes entry: ; The exact stores don't matter, but there need to be three stack slots created - ; CHECK-DAG: movq %rdi, 16(%rsp) - ; CHECK-DAG: movq %rdx, 8(%rsp) - ; CHECK-DAG: movq %rsi, (%rsp) + ; CHECK-DAG: movq %rdi, {{[0-9]*}}(%rsp) + ; CHECK-DAG: movq %rdx, {{[0-9]*}}(%rsp) + ; CHECK-DAG: movq %rsi, {{[0-9]*}}(%rsp) ; CHECK: callq %safepoint_token = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c), "deopt" (i32 0, i32 -1, i32 0, i32 0, i32 0)] to label %normal_return unwind label %exceptional_return diff --git a/llvm/test/CodeGen/X86/statepoint-vector.ll b/llvm/test/CodeGen/X86/statepoint-vector.ll index a7d7be8ed0699..3131423696b29 100644 --- a/llvm/test/CodeGen/X86/statepoint-vector.ll +++ b/llvm/test/CodeGen/X86/statepoint-vector.ll @@ -32,11 +32,11 @@ define <2 x i8 addrspace(1)*> @test2(<2 x i8 addrspace(1)*> %obj, i64 %offset) g ; CHECK-NEXT: movq %rdi, %xmm1 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] ; CHECK-NEXT: paddq %xmm0, %xmm1 -; CHECK-NEXT: movdqa %xmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movdqa %xmm1, (%rsp) +; CHECK-NEXT: movdqa %xmm0, {{[0-9]*}}(%rsp) +; CHECK-NEXT: movdqa %xmm1, {{[0-9]*}}(%rsp) ; CHECK-NEXT: callq do_safepoint ; CHECK-NEXT: .Ltmp1: -; CHECK-NEXT: movaps (%rsp), %xmm0 +; CHECK-NEXT: movaps {{[0-9]*}}(%rsp), %xmm0 ; CHECK-NEXT: addq $40, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -163,13 +163,13 @@ entry: ; CHECK: .short 16 ; CHECK: .short 7 ; CHECK: .short 0 -; CHECK: .long 16 +; CHECK: .long 0 ; CHECK: .byte 3 ; CHECK: .byte 0 ; CHECK: .short 16 ; CHECK: .short 7 ; CHECK: .short 0 -; CHECK: .long 0 +; CHECK: .long 16 ; CHECK: .Ltmp2-test3 ; Check for the four spill slots diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll index eb260ab5aaf6e..814ba3d03feeb 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll @@ -24,16 +24,14 @@ declare void @bar(i8 addrspace(1)*, i8 addrspace(1)*) define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" { ; CHECK-VREG-LABEL: name: test_relocate ; CHECK-VREG: %0:gr64 = COPY $rdi -; CHECK-VREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0) -; CHECK-VREG: %1:gr64 = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 1, 8, %stack.0, 0, %0(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al :: (volatile load store 8 on %stack.0) +; CHECK-VREG: %1:gr64 = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 2, 1, %0(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al ; CHECK-VREG: %2:gr8 = COPY $al ; CHECK-VREG: $rdi = COPY %1 ; CHECK-VREG: CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp ; CHECK-PREG-LABEL: name: test_relocate ; CHECK-PREG: renamable $rbx = COPY $rdi -; CHECK-PREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, renamable $rbx :: (store 8 into %stack.0) -; CHECK-PREG: renamable $rbx = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 1, 8, %stack.0, 0, killed renamable $rbx(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al :: (volatile load store 8 on %stack.0) +; CHECK-PREG: renamable $rbx = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 2, 1, killed renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al ; CHECK-PREG: renamable $bpl = COPY killed $al ; CHECK-PREG: $rdi = COPY killed renamable $rbx ; CHECK-PREG: CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp @@ -51,10 +49,7 @@ define void @test_mixed(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspac ; CHECK-VREG: %2:gr64 = COPY $rdx ; CHECK-VREG: %1:gr64 = COPY $rsi ; CHECK-VREG: %0:gr64 = COPY $rdi -; CHECK-VREG: MOV64mr %stack.1, 1, $noreg, 0, $noreg, %1 :: (store 8 into %stack.1) -; CHECK-VREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %2 :: (store 8 into %stack.0) -; CHECK-VREG: MOV64mr %stack.2, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.2) -; CHECK-VREG: %3:gr64, %4:gr64, %5:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 1, 8, %stack.0, 0, %2(tied-def 0), 2, 0, 2, 0, 1, 8, %stack.1, 0, %1(tied-def 1), 1, 8, %stack.2, 0, %0(tied-def 2), csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0), (volatile load store 8 on %stack.1), (volatile load store 8 on %stack.2) +; CHECK-VREG: %3:gr64, %4:gr64, %5:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 4, %2(tied-def 0), 2, 0, %1(tied-def 1), %0(tied-def 2), 2, 0, 2, 4, 0, 0, 1, 1, 2, 2, 3, 3, csr_64, implicit-def $rsp, implicit-def $ssp ; CHECK-VREG: %6:gr32 = MOV32r0 implicit-def dead $eflags ; CHECK-VREG: %7:gr64 = SUBREG_TO_REG 0, killed %6, %subreg.sub_32bit ; CHECK-VREG: $rdi = COPY %5 @@ -68,10 +63,7 @@ define void @test_mixed(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspac ; CHECK-PREG: renamable $r14 = COPY $rdx ; CHECK-PREG: renamable $r15 = COPY $rsi ; CHECK-PREG: renamable $rbx = COPY $rdi -; CHECK-PREG: MOV64mr %stack.1, 1, $noreg, 0, $noreg, renamable $r15 :: (store 8 into %stack.1) -; CHECK-PREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, renamable $r14 :: (store 8 into %stack.0) -; CHECK-PREG: MOV64mr %stack.2, 1, $noreg, 0, $noreg, renamable $rbx :: (store 8 into %stack.2) -; CHECK-PREG: renamable $r14, renamable $r15, renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 1, 8, %stack.0, 0, killed renamable $r14(tied-def 0), 2, 0, 2, 0, 1, 8, %stack.1, 0, killed renamable $r15(tied-def 1), 1, 8, %stack.2, 0, killed renamable $rbx(tied-def 2), csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0), (volatile load store 8 on %stack.1), (volatile load store 8 on %stack.2) +; CHECK-PREG: renamable $r14, renamable $r15, renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 4, killed renamable $r14(tied-def 0), 2, 0, killed renamable $r15(tied-def 1), killed renamable $rbx(tied-def 2), 2, 0, 2, 4, 0, 0, 1, 1, 2, 2, 3, 3, csr_64, implicit-def $rsp, implicit-def $ssp ; CHECK-PREG: $rdi = COPY killed renamable $rbx ; CHECK-PREG: dead $esi = MOV32r0 implicit-def dead $eflags, implicit-def $rsi ; CHECK-PREG: $rdx = COPY killed renamable $r15 @@ -95,8 +87,7 @@ define i32 addrspace(1)* @test_alloca(i32 addrspace(1)* %ptr) gc "statepoint-exa ; CHECK-VREG-LABEL: name: test_alloca ; CHECK-VREG: %0:gr64 = COPY $rdi ; CHECK-VREG: MOV64mr %stack.0.alloca, 1, $noreg, 0, $noreg, %0 :: (store 8 into %ir.alloca) -; CHECK-VREG: MOV64mr %stack.1, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.1) -; CHECK-VREG: %1:gr64 = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 1, 8, %stack.1, 0, %0(tied-def 0), 0, %stack.0.alloca, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al :: (volatile load store 8 on %stack.1), (volatile load store 8 on %stack.0.alloca) +; CHECK-VREG: %1:gr64 = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 2, 1, %0(tied-def 0), 2, 1, 0, %stack.0.alloca, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al :: (volatile load store 8 on %stack.0.alloca) ; CHECK-VREG: %2:gr8 = COPY $al ; CHECK-VREG: %3:gr64 = MOV64rm %stack.0.alloca, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.alloca) ; CHECK-VREG: $rdi = COPY %1 @@ -105,8 +96,7 @@ define i32 addrspace(1)* @test_alloca(i32 addrspace(1)* %ptr) gc "statepoint-exa ; CHECK-PREG-LABEL: name: test_alloca ; CHECK-PREG: renamable $rbx = COPY $rdi ; CHECK-PREG: MOV64mr %stack.0.alloca, 1, $noreg, 0, $noreg, renamable $rbx :: (store 8 into %ir.alloca) -; CHECK-PREG: MOV64mr %stack.1, 1, $noreg, 0, $noreg, renamable $rbx :: (store 8 into %stack.1) -; CHECK-PREG: renamable $rbx = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 1, 8, %stack.1, 0, killed renamable $rbx(tied-def 0), 0, %stack.0.alloca, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $al :: (volatile load store 8 on %stack.1), (volatile load store 8 on %stack.0.alloca) +; CHECK-PREG: renamable $rbx = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 2, 1, killed renamable $rbx(tied-def 0), 2, 1, 0, %stack.0.alloca, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $al :: (volatile load store 8 on %stack.0.alloca) ; CHECK-PREG: renamable $r14 = MOV64rm %stack.0.alloca, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.alloca) ; CHECK-PREG: $rdi = COPY killed renamable $rbx ; CHECK-PREG: CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp @@ -126,15 +116,13 @@ define void @test_base_derived(i32 addrspace(1)* %base, i32 addrspace(1)* %deriv ; CHECK-VREG-LABEL: name: test_base_derived ; CHECK-VREG: %1:gr64 = COPY $rsi ; CHECK-VREG: %0:gr64 = COPY $rdi -; CHECK-VREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0) -; CHECK-VREG: %2:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 1, 8, %stack.0, 0, %1(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) +; CHECK-VREG: %2:gr64, %3:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 2, %1(tied-def 0), %0(tied-def 1), 2, 0, 2, 1, 1, 0, csr_64, implicit-def $rsp, implicit-def $ssp ; CHECK-VREG: $rdi = COPY %2 ; CHECK-VREG: CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp ; CHECK-PREG-LABEL: name: test_base_derived ; CHECK-PREG: renamable $rbx = COPY $rsi -; CHECK-PREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rdi :: (store 8 into %stack.0) -; CHECK-PREG: renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 1, 8, %stack.0, 0, killed renamable $rbx(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) +; CHECK-PREG: renamable $rbx, dead renamable $r14 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 2, killed renamable $rbx(tied-def 0), killed renamable $r14(tied-def 1), 2, 0, 2, 1, 1, 0, csr_64, implicit-def $rsp, implicit-def $ssp ; CHECK-PREG: $rdi = COPY killed renamable $rbx ; CHECK-PREG: CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp @@ -149,9 +137,8 @@ define void @test_deopt_gcpointer(i32 addrspace(1)* %a, i32 addrspace(1)* %b) gc ; CHECK-VREG-LABEL: name: test_deopt_gcpointer ; CHECK-VREG: %1:gr64 = COPY $rsi ; CHECK-VREG: %0:gr64 = COPY $rdi -; CHECK-VREG: MOV64mr %stack.1, 1, $noreg, 0, $noreg, %1 :: (store 8 into %stack.1) ; CHECK-VREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0) -; CHECK-VREG: %2:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, %1(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0), (volatile load store 8 on %stack.1) +; CHECK-VREG: %2:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 1, %1(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) ; CHECK-VREG: $rdi = COPY %2 ; CHECK-VREG: CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp ; CHECK-VREG: RET 0 @@ -159,7 +146,7 @@ define void @test_deopt_gcpointer(i32 addrspace(1)* %a, i32 addrspace(1)* %b) gc ; CHECK-PREG-LABEL: name: test_deopt_gcpointer ; CHECK-PREG: renamable $rbx = COPY $rsi ; CHECK-PREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rdi :: (store 8 into %stack.0) -; CHECK-PREG: renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, killed renamable $rbx(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0), (volatile load store 8 on %stack.1) +; CHECK-PREG: renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 1, killed renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) ; CHECK-PREG: $rdi = COPY killed renamable $rbx ; CHECK-PREG: CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp @@ -173,16 +160,14 @@ define void @test_deopt_gcpointer(i32 addrspace(1)* %a, i32 addrspace(1)* %b) gc define void @test_gcrelocate_uniqueing(i32 addrspace(1)* %ptr) gc "statepoint-example" { ; CHECK-VREG-LABEL: name: test_gcrelocate_uniqueing ; CHECK-VREG: %0:gr64 = COPY $rdi -; CHECK-VREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0) -; CHECK-VREG: %1:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 2, %0, 2, 4278124286, 1, 8, %stack.0, 0, %0(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) +; CHECK-VREG: %1:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 2, %0, 2, 4278124286, 2, 1, %0(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp ; CHECK-VREG: $rdi = COPY %1 ; CHECK-VREG: $rsi = COPY %1 ; CHECK-VREG: CALL64pcrel32 @consume2, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit-def $rsp, implicit-def $ssp ; CHECK-PREG-LABEL: name: test_gcrelocate_uniqueing ; CHECK-PREG: renamable $rbx = COPY $rdi -; CHECK-PREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, renamable $rbx :: (store 8 into %stack.0) -; CHECK-PREG: renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 2, killed renamable $rbx, 2, 4278124286, 1, 8, %stack.0, 0, renamable $rbx(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) +; CHECK-PREG: renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 2, killed renamable $rbx, 2, 4278124286, 2, 1, renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp ; CHECK-PREG: $rdi = COPY renamable $rbx ; CHECK-PREG: $rsi = COPY killed renamable $rbx ; CHECK-PREG: CALL64pcrel32 @consume2, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit killed $rsi, implicit-def $rsp, implicit-def $ssp @@ -198,9 +183,8 @@ define void @test_gcrelocate_uniqueing(i32 addrspace(1)* %ptr) gc "statepoint-ex define void @test_gcptr_uniqueing(i32 addrspace(1)* %ptr) gc "statepoint-example" { ; CHECK-VREG-LABEL: name: test_gcptr_uniqueing ; CHECK-VREG: %0:gr64 = COPY $rdi -; CHECK-VREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0) ; CHECK-VREG: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp -; CHECK-VREG: %1:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 2, %0, 2, 4278124286, 1, 8, %stack.0, 0, %0(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) +; CHECK-VREG: %1:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 2, %0, 2, 4278124286, 2, 1, %0(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp ; CHECK-VREG: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-VREG: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-VREG: $rdi = COPY %1 @@ -209,8 +193,7 @@ define void @test_gcptr_uniqueing(i32 addrspace(1)* %ptr) gc "statepoint-example ; CHECK-PREG-LABEL: name: test_gcptr_uniqueing ; CHECK-PREG: renamable $rbx = COPY $rdi -; CHECK-PREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, renamable $rbx :: (store 8 into %stack.0) -; CHECK-PREG: renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 2, killed renamable $rbx, 2, 4278124286, 1, 8, %stack.0, 0, renamable $rbx(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) +; CHECK-PREG: renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 2, killed renamable $rbx, 2, 4278124286, 2, 1, renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp ; CHECK-PREG: $rdi = COPY renamable $rbx ; CHECK-PREG: $rsi = COPY killed renamable $rbx ; CHECK-PREG: CALL64pcrel32 @use1, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit killed $rsi, implicit-def $rsp, implicit-def $ssp @@ -230,9 +213,8 @@ define i1 @test_cross_bb(i32 addrspace(1)* %a, i1 %external_cond) gc "statepoint ; CHECK-VREG: %1:gr32 = COPY $esi ; CHECK-VREG-NEXT: %0:gr64 = COPY $rdi ; CHECK-VREG-NEXT: %4:gr8 = COPY %1.sub_8bit -; CHECK-VREG-NEXT: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0) ; CHECK-VREG-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp -; CHECK-VREG-NEXT: %2:gr64 = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 1, 8, %stack.0, 0, %0(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al :: (volatile load store 8 on %stack.0) +; CHECK-VREG-NEXT: %2:gr64 = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 2, 1, %0(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al ; CHECK-VREG-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-VREG-NEXT: %5:gr8 = COPY $al ; CHECK-VREG-NEXT: %3:gr8 = COPY %5 @@ -269,8 +251,8 @@ right: define i1 @duplicate_reloc() gc "statepoint-example" { ; CHECK-VREG-LABEL: name: duplicate_reloc ; CHECK-VREG: bb.0.entry: -; CHECK-VREG: STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp -; CHECK-VREG: STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp +; CHECK-VREG: STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 1, 2, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp +; CHECK-VREG: STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 1, 2, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp ; CHECK-VREG: %0:gr8 = MOV8ri 1 ; CHECK-VREG: $al = COPY %0 ; CHECK-VREG: RET 0, $al @@ -294,7 +276,7 @@ define <2 x i8 addrspace(1)*> @test_vector(<2 x i8 addrspace(1)*> %obj) gc "stat ; CHECK-VREG-LABEL: name: test_vector ; CHECK-VREG: %0:vr128 = COPY $xmm0 ; CHECK-VREG: MOVAPSmr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 16 into %stack.0) -; CHECK-VREG: STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 1, 16, %stack.0, 0, 1, 16, %stack.0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 16 on %stack.0) +; CHECK-VREG: STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 1, 1, 16, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 16 on %stack.0) ; CHECK-VREG: %1:vr128 = MOVAPSrm %stack.0, 1, $noreg, 0, $noreg :: (load 16 from %stack.0) ; CHECK-VREG: $xmm0 = COPY %1 ; CHECK-VREG: RET 0, $xmm0 @@ -314,13 +296,9 @@ define void @test_limit(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspac ; CHECK-VREG: %2:gr64 = COPY $rdx ; CHECK-VREG: %1:gr64 = COPY $rsi ; CHECK-VREG: %0:gr64 = COPY $rdi -; CHECK-VREG: MOV64mr %stack.1, 1, $noreg, 0, $noreg, %3 :: (store 8 into %stack.1) -; CHECK-VREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %4 :: (store 8 into %stack.0) -; CHECK-VREG: MOV64mr %stack.2, 1, $noreg, 0, $noreg, %2 :: (store 8 into %stack.2) -; CHECK-VREG: MOV64mr %stack.3, 1, $noreg, 0, $noreg, %1 :: (store 8 into %stack.3) -; CHECK-VREG: MOV64mr %stack.4, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.4) -; CHECK-VREG: %5:gr64, %6:gr64, %7:gr64, %8:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 1, 8, %stack.0, 0, %4(tied-def 0), 1, 8, %stack.1, 0, %3(tied-def 1), 1, 8, %stack.2, 0, %2(tied-def 2), 1, 8, %stack.3, 0, %1(tied-def 3), 1, 8, %stack.4, 0, 1, 8, %stack.4, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0), (volatile load store 8 on %stack.1), (volatile load store 8 on %stack.2), (volatile load store 8 on %stack.3), (volatile load store 8 on %stack.4) -; CHECK-VREG: %9:gr64 = MOV64rm %stack.4, 1, $noreg, 0, $noreg :: (load 8 from %stack.4) +; CHECK-VREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0) +; CHECK-VREG: %5:gr64, %6:gr64, %7:gr64, %8:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 5, %4(tied-def 0), %3(tied-def 1), %2(tied-def 2), %1(tied-def 3), 1, 8, %stack.0, 0, 2, 0, 2, 5, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) +; CHECK-VREG: %9:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) ; CHECK-VREG: $rdi = COPY %9 ; CHECK-VREG: $rsi = COPY %8 ; CHECK-VREG: $rdx = COPY %7 @@ -345,19 +323,19 @@ entry: define void @test_duplicate_ir_values() gc "statepoint-example" personality i32* ()* @fake_personality_function{ ;CHECK-VREG-LABEL: name: test_duplicate_ir_values ;CHECK-VREG: bb.0.entry: -;CHECK-VREG: %0:gr64 = STATEPOINT 1, 16, 5, %8, $edi, $rsi, $edx, $ecx, $r8d, 2, 0, 2, 0, 2, 0, 1, 8, %stack.0, 0, %1(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $eax :: (volatile load store 8 on %stack.0) +;CHECK-VREG: %0:gr64 = STATEPOINT 1, 16, 5, %8, $edi, $rsi, $edx, $ecx, $r8d, 2, 0, 2, 0, 2, 0, 2, 1, killed %1(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $eax ;CHECK-VREG: JMP_1 %bb.1 ;CHECK-VREG: bb.1.normal_continue: ;CHECK-VREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0) ;CHECK-VREG: %13:gr32 = MOV32ri 10 ;CHECK-VREG: $edi = COPY %13 -;CHECK-VREG: STATEPOINT 2882400000, 0, 1, @__llvm_deoptimize, $edi, 2, 0, 2, 2, 2, 2, 1, 8, %stack.0, 0, 1, 8, %stack.0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) +;CHECK-VREG: STATEPOINT 2882400000, 0, 1, @__llvm_deoptimize, $edi, 2, 0, 2, 2, 2, 2, 1, 8, %stack.0, 0, 1, 8, %stack.0, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) ;CHECK-VREG: bb.2.exceptional_return (landing-pad): ;CHECK-VREG: EH_LABEL ;CHECK-VREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0) ;CHECK-VREG: %12:gr32 = MOV32ri -271 ;CHECK-VREG: $edi = COPY %12 -;CHECK-VREG: STATEPOINT 2882400000, 0, 1, @__llvm_deoptimize, $edi, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) +;CHECK-VREG: STATEPOINT 2882400000, 0, 1, @__llvm_deoptimize, $edi, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) entry: %local.0 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8 @@ -388,13 +366,11 @@ define i8 addrspace(1)* @test_isel_sched(i8 addrspace(1)* %0, i8 addrspace(1)* % ;CHECK-VREG: %0:gr64 = COPY $rdi ;CHECK-VREG: TEST32rr %2, %2, implicit-def $eflags ;CHECK-VREG: %5:gr64 = CMOV64rr %1, %0, 4, implicit $eflags -;CHECK-VREG: MOV64mr %stack.1, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.1) -;CHECK-VREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %1 :: (store 8 into %stack.0) ;CHECK-VREG: %6:gr32 = MOV32r0 implicit-def dead $eflags ;CHECK-VREG: %7:gr64 = SUBREG_TO_REG 0, killed %6, %subreg.sub_32bit ;CHECK-VREG: $rdi = COPY %7 ;CHECK-VREG: $rsi = COPY %5 -;CHECK-VREG: %3:gr64, %4:gr64 = STATEPOINT 10, 0, 2, @bar, $rdi, $rsi, 2, 0, 2, 0, 2, 0, 1, 8, %stack.0, 0, %1(tied-def 0), 1, 8, %stack.1, 0, %0(tied-def 1), csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0), (volatile load store 8 on %stack.1) +;CHECK-VREG: %3:gr64, %4:gr64 = STATEPOINT 10, 0, 2, @bar, $rdi, $rsi, 2, 0, 2, 0, 2, 0, 2, 2, %1(tied-def 0), %0(tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp ;CHECK-VREG: TEST32rr %2, %2, implicit-def $eflags ;CHECK-VREG: %8:gr64 = CMOV64rr %3, %4, 4, implicit $eflags ;CHECK-VREG: $rax = COPY %8 diff --git a/llvm/test/CodeGen/X86/statepoint-vreg.ll b/llvm/test/CodeGen/X86/statepoint-vreg.ll index 6a65abed57541..a91b5153cecb8 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg.ll @@ -30,7 +30,6 @@ define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" { ; CHECK-NEXT: .cfi_offset %rbx, -24 ; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: movq %rdi, (%rsp) ; CHECK-NEXT: callq return_i1 ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: movl %eax, %ebp @@ -62,17 +61,12 @@ define void @test_mixed(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspac ; CHECK-NEXT: .cfi_def_cfa_offset 24 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: subq $32, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset %rbx, -32 ; CHECK-NEXT: .cfi_offset %r14, -24 ; CHECK-NEXT: .cfi_offset %r15, -16 ; CHECK-NEXT: movq %rdx, %r14 ; CHECK-NEXT: movq %rsi, %r15 ; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: callq func ; CHECK-NEXT: .Ltmp1: ; CHECK-NEXT: movq %rbx, %rdi @@ -81,8 +75,6 @@ define void @test_mixed(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspac ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movq %r14, %r8 ; CHECK-NEXT: callq consume5 -; CHECK-NEXT: addq $32, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 24 ; CHECK-NEXT: popq %r14 @@ -109,20 +101,19 @@ define i32 addrspace(1)* @test_alloca(i32 addrspace(1)* %ptr) gc "statepoint-exa ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset %rbx, -24 ; CHECK-NEXT: .cfi_offset %r14, -16 ; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rdi, {{[0-9]*}}(%rsp) ; CHECK-NEXT: callq return_i1 ; CHECK-NEXT: .Ltmp2: -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r14 +; CHECK-NEXT: movq {{[0-9]*}}(%rsp), %r14 ; CHECK-NEXT: movq %rbx, %rdi ; CHECK-NEXT: callq consume ; CHECK-NEXT: movq %r14, %rax -; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 24 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 @@ -143,20 +134,25 @@ entry: define void @test_base_derived(i32 addrspace(1)* %base, i32 addrspace(1)* %derived) gc "statepoint-example" { ; CHECK-LABEL: test_base_derived: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %r14 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset %rbx, -16 -; CHECK-NEXT: movq %rsi, %rbx -; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: .cfi_offset %r14, -16 +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: movq %rdi, %r14 ; CHECK-NEXT: callq func ; CHECK-NEXT: .Ltmp3: ; CHECK-NEXT: movq %rbx, %rdi ; CHECK-NEXT: callq consume -; CHECK-NEXT: addq $16, %rsp +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %base, i32 addrspace(1)* %derived)] @@ -175,7 +171,6 @@ define void @test_deopt_gcpointer(i32 addrspace(1)* %a, i32 addrspace(1)* %b) gc ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset %rbx, -16 ; CHECK-NEXT: movq %rsi, %rbx -; CHECK-NEXT: movq %rsi, (%rsp) ; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: callq func ; CHECK-NEXT: .Ltmp4: @@ -198,18 +193,13 @@ define void @test_gcrelocate_uniqueing(i32 addrspace(1)* %ptr) gc "statepoint-ex ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: subq $16, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset %rbx, -16 ; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: callq func ; CHECK-NEXT: .Ltmp5: ; CHECK-NEXT: movq %rbx, %rdi ; CHECK-NEXT: movq %rbx, %rsi ; CHECK-NEXT: callq consume2 -; CHECK-NEXT: addq $16, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -226,18 +216,13 @@ define void @test_gcptr_uniqueing(i32 addrspace(1)* %ptr) gc "statepoint-example ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: subq $16, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset %rbx, -16 ; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: callq func ; CHECK-NEXT: .Ltmp6: ; CHECK-NEXT: movq %rbx, %rdi ; CHECK-NEXT: movq %rbx, %rsi ; CHECK-NEXT: callq use1 -; CHECK-NEXT: addq $16, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -261,14 +246,11 @@ define i1 @test_cross_bb(i32 addrspace(1)* %a, i1 %external_cond) gc "statepoint ; CHECK-NEXT: .cfi_def_cfa_offset 24 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: subq $16, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset %rbx, -32 ; CHECK-NEXT: .cfi_offset %r14, -24 ; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movl %esi, %ebp ; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: callq return_i1 ; CHECK-NEXT: .Ltmp7: ; CHECK-NEXT: testb $1, %bpl @@ -282,8 +264,6 @@ define i1 @test_cross_bb(i32 addrspace(1)* %a, i1 %external_cond) gc "statepoint ; CHECK-NEXT: .LBB7_2: # %right ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: .LBB7_3: # %right -; CHECK-NEXT: addq $16, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 24 ; CHECK-NEXT: popq %r14 @@ -365,8 +345,8 @@ define void @test_limit(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspac ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 40 -; CHECK-NEXT: subq $40, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset %rbx, -40 ; CHECK-NEXT: .cfi_offset %r12, -32 ; CHECK-NEXT: .cfi_offset %r14, -24 @@ -375,10 +355,6 @@ define void @test_limit(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspac ; CHECK-NEXT: movq %rcx, %r15 ; CHECK-NEXT: movq %rdx, %r12 ; CHECK-NEXT: movq %rsi, %rbx -; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %r8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rdi, (%rsp) ; CHECK-NEXT: callq func ; CHECK-NEXT: .Ltmp11: @@ -388,7 +364,7 @@ define void @test_limit(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspac ; CHECK-NEXT: movq %r15, %rcx ; CHECK-NEXT: movq %r14, %r8 ; CHECK-NEXT: callq consume5 -; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 40 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 32 @@ -417,14 +393,12 @@ define i64 addrspace(1)* @test_basic_invoke(i64 addrspace(1)* %obj, ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset %rbx, -24 ; CHECK-NEXT: .cfi_offset %r14, -16 ; CHECK-NEXT: movq %rsi, %r14 ; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: .Ltmp12: ; CHECK-NEXT: callq some_call ; CHECK-NEXT: .Ltmp15: @@ -432,7 +406,7 @@ define i64 addrspace(1)* @test_basic_invoke(i64 addrspace(1)* %obj, ; CHECK-NEXT: # %bb.1: # %normal_return ; CHECK-NEXT: movq %rbx, %rax ; CHECK-NEXT: .LBB11_2: # %normal_return -; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 24 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 @@ -440,7 +414,7 @@ define i64 addrspace(1)* @test_basic_invoke(i64 addrspace(1)* %obj, ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB11_3: # %exceptional_return -; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .Ltmp14: ; CHECK-NEXT: movq %r14, %rax ; CHECK-NEXT: jmp .LBB11_2 @@ -476,8 +450,8 @@ define i64 addrspace(1)* @test_invoke_same_val(i1 %cond, i64 addrspace(1)* %val1 ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 40 -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset %rbx, -40 ; CHECK-NEXT: .cfi_offset %r14, -32 ; CHECK-NEXT: .cfi_offset %r15, -24 @@ -488,8 +462,6 @@ define i64 addrspace(1)* @test_invoke_same_val(i1 %cond, i64 addrspace(1)* %val1 ; CHECK-NEXT: testb $1, %r14b ; CHECK-NEXT: je .LBB12_2 ; CHECK-NEXT: # %bb.1: # %left -; CHECK-NEXT: movq %rbp, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp) ; CHECK-NEXT: .Ltmp19: ; CHECK-NEXT: movq %rbp, %rdi ; CHECK-NEXT: callq some_call @@ -498,8 +470,6 @@ define i64 addrspace(1)* @test_invoke_same_val(i1 %cond, i64 addrspace(1)* %val1 ; CHECK-NEXT: jmp .LBB12_4 ; CHECK-NEXT: .LBB12_2: # %right ; CHECK-NEXT: movq %rcx, %r15 -; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ; CHECK-NEXT: .Ltmp16: ; CHECK-NEXT: movq %rbp, %rdi ; CHECK-NEXT: callq some_call @@ -513,7 +483,7 @@ define i64 addrspace(1)* @test_invoke_same_val(i1 %cond, i64 addrspace(1)* %val1 ; CHECK-NEXT: .LBB12_6: # %exceptional_return.left ; CHECK-NEXT: movq %rbp, %rax ; CHECK-NEXT: .LBB12_7: # %exceptional_return.left -; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 40 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 32 @@ -525,7 +495,7 @@ define i64 addrspace(1)* @test_invoke_same_val(i1 %cond, i64 addrspace(1)* %val1 ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB12_8: # %exceptional_return.right -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .Ltmp18: ; CHECK-NEXT: movq %rbx, %rax ; CHECK-NEXT: jmp .LBB12_7 @@ -603,20 +573,18 @@ define void @test_sched(float %0, i32 %1, i8 addrspace(1)* %2) gc "statepoint-ex ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: subq $40, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset %rbx, -24 ; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rsi, %rbx ; CHECK-NEXT: movl %edi, %ebp ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: callq consume3 ; CHECK-NEXT: .Ltmp25: ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsi2sd %ebp, %xmm0 ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp) ; CHECK-NEXT: nopl 8(%rax,%rax) ; CHECK-NEXT: .Ltmp26: ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload @@ -625,7 +593,6 @@ define void @test_sched(float %0, i32 %1, i8 addrspace(1)* %2) gc "statepoint-ex ; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: movss %xmm0, (%rsp) -; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp) ; CHECK-NEXT: nopl 8(%rax,%rax) ; CHECK-NEXT: .Ltmp27: ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload @@ -634,7 +601,6 @@ define void @test_sched(float %0, i32 %1, i8 addrspace(1)* %2) gc "statepoint-ex ; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: movss %xmm0, (%rsp) -; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp) ; CHECK-NEXT: nopl 8(%rax,%rax) ; CHECK-NEXT: .Ltmp28: ; CHECK-NEXT: xorl %eax, %eax @@ -650,7 +616,7 @@ define void @test_sched(float %0, i32 %1, i8 addrspace(1)* %2) gc "statepoint-ex ; CHECK-NEXT: movss %xmm0, (%rsp) ; CHECK-NEXT: nopl 8(%rax,%rax) ; CHECK-NEXT: .Ltmp29: -; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 24 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 diff --git a/llvm/test/CodeGen/X86/statepoint-vreg.mir b/llvm/test/CodeGen/X86/statepoint-vreg.mir index 36b70836b270f..933ca55eef5c5 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg.mir +++ b/llvm/test/CodeGen/X86/statepoint-vreg.mir @@ -146,7 +146,7 @@ body: | %1:gr64 = COPY $rsi %0:gr64 = COPY $rdi ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - %2:gr64, %3:gr64 = STATEPOINT 2882400000, 0, 0, @bar, 2, 0, 2, 0, 2, 1, 2, 0, %1, %1(tied-def 0), %0, %0(tied-def 1), csr_64, implicit-def $rsp, implicit-def $ssp + %2:gr64, %3:gr64 = STATEPOINT 2882400000, 0, 0, @bar, 2, 0, 2, 0, 2, 1, 2, 0, 2, 2, %1(tied-def 0), %0(tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp %4:gr32 = MOV32rm killed %3, 1, $noreg, 0, $noreg :: (load 4 from %ir.rel1, addrspace 1) %5:gr32 = ADD32rm %4, killed %2, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load 4 from %ir.rel2, addrspace 1) From 315970de1d7140fa689dbbe7482620f134e5d021 Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Tue, 6 Oct 2020 03:41:19 -0700 Subject: [PATCH 126/321] [llvm-objcopy][MachO] Add support for universal binaries This diff adds support for universal binaries to llvm-objcopy. This is a recommit of 32c8435ef70031 with the asan issue fixed. Test plan: make check-all Differential revision: https://reviews.llvm.org/D88400 --- .../llvm/Object/MachOUniversalWriter.h | 6 ++ llvm/lib/Object/MachOUniversalWriter.cpp | 5 ++ .../tools/llvm-objcopy/MachO/strip-all.test | 5 ++ .../llvm-objcopy/MachO/universal-object.test | 42 +++++++++++ .../tools/llvm-objcopy/MachO/MachOObjcopy.cpp | 73 +++++++++++++++++++ llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h | 4 + llvm/tools/llvm-objcopy/llvm-objcopy.cpp | 26 ++++++- llvm/tools/llvm-objcopy/llvm-objcopy.h | 32 ++++++++ 8 files changed, 190 insertions(+), 3 deletions(-) create mode 100644 llvm/test/tools/llvm-objcopy/MachO/universal-object.test create mode 100644 llvm/tools/llvm-objcopy/llvm-objcopy.h diff --git a/llvm/include/llvm/Object/MachOUniversalWriter.h b/llvm/include/llvm/Object/MachOUniversalWriter.h index 49352440dca17..606db94c9f202 100644 --- a/llvm/include/llvm/Object/MachOUniversalWriter.h +++ b/llvm/include/llvm/Object/MachOUniversalWriter.h @@ -43,6 +43,12 @@ class Slice { Slice(const MachOObjectFile &O, uint32_t Align); + /// This constructor takes prespecified \param CPUType, \param CPUSubType, + /// \param ArchName, \param Align instead of inferring them from the archive + /// memebers. + Slice(const Archive &A, uint32_t CPUType, uint32_t CPUSubType, + std::string ArchName, uint32_t Align); + static Expected create(const Archive &A, LLVMContext *LLVMCtx = nullptr); diff --git a/llvm/lib/Object/MachOUniversalWriter.cpp b/llvm/lib/Object/MachOUniversalWriter.cpp index 165964e077ce3..4bb467e56a6f9 100644 --- a/llvm/lib/Object/MachOUniversalWriter.cpp +++ b/llvm/lib/Object/MachOUniversalWriter.cpp @@ -75,6 +75,11 @@ static uint32_t calculateAlignment(const MachOObjectFile &ObjectFile) { } } +Slice::Slice(const Archive &A, uint32_t CPUType, uint32_t CPUSubType, + std::string ArchName, uint32_t Align) + : B(&A), CPUType(CPUType), CPUSubType(CPUSubType), + ArchName(std::move(ArchName)), P2Alignment(Align) {} + Slice::Slice(const MachOObjectFile &O, uint32_t Align) : B(&O), CPUType(O.getHeader().cputype), CPUSubType(O.getHeader().cpusubtype), diff --git a/llvm/test/tools/llvm-objcopy/MachO/strip-all.test b/llvm/test/tools/llvm-objcopy/MachO/strip-all.test index 4ff31f5c1e422..cb41b353ec53c 100644 --- a/llvm/test/tools/llvm-objcopy/MachO/strip-all.test +++ b/llvm/test/tools/llvm-objcopy/MachO/strip-all.test @@ -27,6 +27,11 @@ # cmp %t4 %t.dwarf.stripped # cmp %t5 %t.dwarf.stripped +# RUN: llvm-lipo %t.dwarf -create -output %t.dwarf.universal +# RUN: llvm-strip %t.dwarf.universal -o %t.dwarf.universal.stripped +# RUN: llvm-lipo %t.dwarf.universal.stripped -thin x86_64 -output %t6 +# RUN: cmp %t6 %t.dwarf.stripped + ## Make sure that debug sections are removed. # DWARF: Sections [ # DWARF-NOT: Name: __debug_str diff --git a/llvm/test/tools/llvm-objcopy/MachO/universal-object.test b/llvm/test/tools/llvm-objcopy/MachO/universal-object.test new file mode 100644 index 0000000000000..a6146fd56483a --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/MachO/universal-object.test @@ -0,0 +1,42 @@ +# This test verifies that llvm-objcopy copies a univeral Mach-O object file properly. + +# RUN: yaml2obj %p/Inputs/i386.yaml -o %t.i386 +# RUN: yaml2obj %p/Inputs/x86_64.yaml -o %t.x86_64 + +## Case 1: copy a universal object containing regular Mach-O objects. +# RUN: llvm-lipo %t.i386 %t.x86_64 -create -output %t.universal +# RUN: llvm-objcopy %t.universal %t.universal.copy +# RUN: llvm-lipo %t.universal.copy -archs | FileCheck --check-prefix=VERIFY_ARCHS %s +# RUN: llvm-lipo %t.universal.copy -thin i386 -output %t.i386.copy +# RUN: llvm-lipo %t.universal.copy -thin x86_64 -output %t.x86_64.copy +# RUN: cmp %t.i386 %t.i386.copy +# RUN: cmp %t.x86_64 %t.x86_64.copy + +## Case 2: copy a universal object file containing an archive. +# RUN: rm -f %t.archive.i386 +# RUN: llvm-ar cr %t.archive.i386 %t.i386 +# RUN: llvm-lipo %t.archive.i386 %t.x86_64 -create -output %t.universal.containing.archive +# RUN: llvm-objcopy %t.universal.containing.archive %t.universal.containing.archive.copy +# RUN: llvm-lipo %t.universal.containing.archive.copy -archs | FileCheck --check-prefix=VERIFY_ARCHS %s +# RUN: llvm-lipo %t.universal.containing.archive.copy -thin i386 -output %t.archive.i386.copy +# RUN: llvm-lipo %t.universal.containing.archive.copy -thin x86_64 -output %t.archive.x86_64.copy +# RUN: cmp %t.archive.i386 %t.archive.i386.copy +# RUN: cmp %t.x86_64 %t.archive.x86_64.copy + +## Case 3: copy an archive containing a universal object. +# RUN: llvm-ar cr %t.archive.containing.universal %t.universal +# RUN: llvm-objcopy %t.archive.containing.universal %t.archive.containing.universal.copy + +## Case 4: try to copy a universal object file contaning a bitcode slice. +# RUN: echo 'target triple = "arm64-apple-ios8.0.0"' | llvm-as -o %t.bitcode +# RUN: llvm-lipo %t.bitcode %t.x86_64 -create -output %t.universal.containing.bitcode +# RUN: not llvm-objcopy %t.universal.containing.bitcode %t.universal.containing.bitcode.copy 2>&1 \ +# RUN: | FileCheck --check-prefix=UNSUPPORTED_UNIVERSAL_OBJECT %s + +## Case 5: try to copy an archive containing an unsupported universal object. +# RUN: llvm-ar cr %t.archive.universal.bitcode %t.universal.containing.bitcode +# RUN: not llvm-objcopy %t.archive.universal.bitcode %t.archive.universal.bitcode.copy 2>&1 \ +# RUN: | FileCheck --check-prefix=UNSUPPORTED_UNIVERSAL_OBJECT %s + +# VERIFY_ARCHS: i386 x86_64 +# UNSUPPORTED_UNIVERSAL_OBJECT: slice for 'arm64' of the universal Mach-O binary {{.*}} is not a Mach-O object or an archive diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp index 47a08d33002af..337c448f6dfd0 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp @@ -8,9 +8,13 @@ #include "MachOObjcopy.h" #include "../CopyConfig.h" +#include "../llvm-objcopy.h" #include "MachOReader.h" #include "MachOWriter.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/MachOUniversalWriter.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" @@ -386,6 +390,75 @@ Error executeObjcopyOnBinary(const CopyConfig &Config, return Writer.write(); } +Error executeObjcopyOnMachOUniversalBinary(CopyConfig &Config, + const MachOUniversalBinary &In, + Buffer &Out) { + SmallVector, 2> Binaries; + SmallVector Slices; + for (const auto &O : In.objects()) { + Expected> ArOrErr = O.getAsArchive(); + if (ArOrErr) { + Expected> NewArchiveMembersOrErr = + createNewArchiveMembers(Config, **ArOrErr); + if (!NewArchiveMembersOrErr) + return NewArchiveMembersOrErr.takeError(); + Expected> OutputBufferOrErr = + writeArchiveToBuffer(*NewArchiveMembersOrErr, + (*ArOrErr)->hasSymbolTable(), (*ArOrErr)->kind(), + Config.DeterministicArchives, + (*ArOrErr)->isThin()); + if (!OutputBufferOrErr) + return OutputBufferOrErr.takeError(); + Expected> BinaryOrErr = + object::createBinary(**OutputBufferOrErr); + if (!BinaryOrErr) + return BinaryOrErr.takeError(); + Binaries.emplace_back(std::move(*BinaryOrErr), + std::move(*OutputBufferOrErr)); + Slices.emplace_back(*cast(Binaries.back().getBinary()), + O.getCPUType(), O.getCPUSubType(), + O.getArchFlagName(), O.getAlign()); + continue; + } + // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class + // ObjectForArch return an Error in case of the type mismatch. We need to + // check each in turn to see what kind of slice this is, so ignore errors + // produced along the way. + consumeError(ArOrErr.takeError()); + + Expected> ObjOrErr = O.getAsObjectFile(); + if (!ObjOrErr) { + consumeError(ObjOrErr.takeError()); + return createStringError(std::errc::invalid_argument, + "slice for '%s' of the universal Mach-O binary " + "'%s' is not a Mach-O object or an archive", + O.getArchFlagName().c_str(), + Config.InputFilename.str().c_str()); + } + std::string ArchFlagName = O.getArchFlagName(); + MemBuffer MB(ArchFlagName); + if (Error E = executeObjcopyOnBinary(Config, **ObjOrErr, MB)) + return E; + std::unique_ptr OutputBuffer = + MB.releaseMemoryBuffer(); + Expected> BinaryOrErr = + object::createBinary(*OutputBuffer); + if (!BinaryOrErr) + return BinaryOrErr.takeError(); + Binaries.emplace_back(std::move(*BinaryOrErr), std::move(OutputBuffer)); + Slices.emplace_back(*cast(Binaries.back().getBinary()), + O.getAlign()); + } + Expected> B = + writeUniversalBinaryToBuffer(Slices); + if (!B) + return B.takeError(); + if (Error E = Out.allocate((*B)->getBufferSize())) + return E; + memcpy(Out.getBufferStart(), (*B)->getBufferStart(), (*B)->getBufferSize()); + return Out.commit(); +} + } // end namespace macho } // end namespace objcopy } // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h index f34e361db7ea9..c3f5391f79b6a 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h +++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h @@ -24,6 +24,10 @@ class Buffer; namespace macho { Error executeObjcopyOnBinary(const CopyConfig &Config, object::MachOObjectFile &In, Buffer &Out); + +Error executeObjcopyOnMachOUniversalBinary( + CopyConfig &Config, const object::MachOUniversalBinary &In, Buffer &Out); + } // end namespace macho } // end namespace objcopy } // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp index 175f2929eb230..e1f1ed8f3cc1b 100644 --- a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp @@ -25,6 +25,7 @@ #include "llvm/Object/ELFTypes.h" #include "llvm/Object/Error.h" #include "llvm/Object/MachO.h" +#include "llvm/Object/MachOUniversal.h" #include "llvm/Object/Wasm.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" @@ -144,6 +145,10 @@ static Error executeObjcopyOnBinary(CopyConfig &Config, object::Binary &In, return coff::executeObjcopyOnBinary(Config, *COFFBinary, Out); else if (auto *MachOBinary = dyn_cast(&In)) return macho::executeObjcopyOnBinary(Config, *MachOBinary, Out); + else if (auto *MachOUniversalBinary = + dyn_cast(&In)) + return macho::executeObjcopyOnMachOUniversalBinary( + Config, *MachOUniversalBinary, Out); else if (auto *WasmBinary = dyn_cast(&In)) return objcopy::wasm::executeObjcopyOnBinary(Config, *WasmBinary, Out); else @@ -151,7 +156,11 @@ static Error executeObjcopyOnBinary(CopyConfig &Config, object::Binary &In, "unsupported object file format"); } -static Error executeObjcopyOnArchive(CopyConfig &Config, const Archive &Ar) { +namespace llvm { +namespace objcopy { + +Expected> +createNewArchiveMembers(CopyConfig &Config, const Archive &Ar) { std::vector NewArchiveMembers; Error Err = Error::success(); for (const Archive::Child &Child : Ar.children(Err)) { @@ -166,7 +175,7 @@ static Error executeObjcopyOnArchive(CopyConfig &Config, const Archive &Ar) { MemBuffer MB(ChildNameOrErr.get()); if (Error E = executeObjcopyOnBinary(Config, *ChildOrErr->get(), MB)) - return E; + return std::move(E); Expected Member = NewArchiveMember::getOldMember(Child, Config.DeterministicArchives); @@ -178,8 +187,19 @@ static Error executeObjcopyOnArchive(CopyConfig &Config, const Archive &Ar) { } if (Err) return createFileError(Config.InputFilename, std::move(Err)); + return std::move(NewArchiveMembers); +} + +} // end namespace objcopy +} // end namespace llvm - return deepWriteArchive(Config.OutputFilename, NewArchiveMembers, +static Error executeObjcopyOnArchive(CopyConfig &Config, + const object::Archive &Ar) { + Expected> NewArchiveMembersOrErr = + createNewArchiveMembers(Config, Ar); + if (!NewArchiveMembersOrErr) + return NewArchiveMembersOrErr.takeError(); + return deepWriteArchive(Config.OutputFilename, *NewArchiveMembersOrErr, Ar.hasSymbolTable(), Ar.kind(), Config.DeterministicArchives, Ar.isThin()); } diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.h b/llvm/tools/llvm-objcopy/llvm-objcopy.h new file mode 100644 index 0000000000000..97a166769f954 --- /dev/null +++ b/llvm/tools/llvm-objcopy/llvm-objcopy.h @@ -0,0 +1,32 @@ +//===- llvm-objcopy.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_OBJCOPY_OBJCOPY_H +#define LLVM_TOOLS_OBJCOPY_OBJCOPY_H + +#include "llvm/Support/Error.h" + +namespace llvm { + +struct NewArchiveMember; + +namespace object { + +class Archive; + +} // end namespace object + +namespace objcopy { +struct CopyConfig; +Expected> +createNewArchiveMembers(CopyConfig &Config, const object::Archive &Ar); + +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_TOOLS_OBJCOPY_OBJCOPY_H From d6c9dc3c17e444e007758c01507bb5280532c9f8 Mon Sep 17 00:00:00 2001 From: Adam Balogh Date: Tue, 6 Oct 2020 09:25:58 +0200 Subject: [PATCH 127/321] [clang-tidy] Remove obsolete checker google-runtime-references The rules which is the base of this checker is removed from the //Google C++ Style Guide// in May: [[ https://github.com/google/styleguide/pull/553 | Update C++ styleguide ]]. Now this checker became obsolete. Differential Revision: https://reviews.llvm.org/D88831 --- .../clang-tidy/google/CMakeLists.txt | 1 - .../clang-tidy/google/GoogleTidyModule.cpp | 3 - .../clang-tidy/google/NonConstReferences.cpp | 148 ----------------- .../clang-tidy/google/NonConstReferences.h | 41 ----- clang-tools-extra/docs/ReleaseNotes.rst | 3 + .../checks/google-runtime-references.rst | 17 -- .../checkers/google-runtime-references.cpp | 155 ------------------ 7 files changed, 3 insertions(+), 365 deletions(-) delete mode 100644 clang-tools-extra/clang-tidy/google/NonConstReferences.cpp delete mode 100644 clang-tools-extra/clang-tidy/google/NonConstReferences.h delete mode 100644 clang-tools-extra/docs/clang-tidy/checks/google-runtime-references.rst delete mode 100644 clang-tools-extra/test/clang-tidy/checkers/google-runtime-references.cpp diff --git a/clang-tools-extra/clang-tidy/google/CMakeLists.txt b/clang-tools-extra/clang-tidy/google/CMakeLists.txt index e38ba8abb78d3..e1e5fc7a85a7d 100644 --- a/clang-tools-extra/clang-tidy/google/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/google/CMakeLists.txt @@ -16,7 +16,6 @@ add_clang_library(clangTidyGoogleModule GlobalVariableDeclarationCheck.cpp GoogleTidyModule.cpp IntegerTypesCheck.cpp - NonConstReferences.cpp OverloadedUnaryAndCheck.cpp TodoCommentCheck.cpp UnnamedNamespaceInHeaderCheck.cpp diff --git a/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp b/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp index 4c6b791f8cdd1..d8fcf3fed94fd 100644 --- a/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp @@ -23,7 +23,6 @@ #include "GlobalNamesInHeadersCheck.h" #include "GlobalVariableDeclarationCheck.h" #include "IntegerTypesCheck.h" -#include "NonConstReferences.h" #include "OverloadedUnaryAndCheck.h" #include "TodoCommentCheck.h" #include "UnnamedNamespaceInHeaderCheck.h" @@ -63,8 +62,6 @@ class GoogleModule : public ClangTidyModule { "google-runtime-int"); CheckFactories.registerCheck( "google-runtime-operator"); - CheckFactories.registerCheck( - "google-runtime-references"); CheckFactories .registerCheck( "google-readability-avoid-underscore-in-googletest-name"); diff --git a/clang-tools-extra/clang-tidy/google/NonConstReferences.cpp b/clang-tools-extra/clang-tidy/google/NonConstReferences.cpp deleted file mode 100644 index e0fb614dfe8be..0000000000000 --- a/clang-tools-extra/clang-tidy/google/NonConstReferences.cpp +++ /dev/null @@ -1,148 +0,0 @@ -//===--- NonConstReferences.cpp - clang-tidy --------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "NonConstReferences.h" -#include "../utils/OptionsUtils.h" -#include "clang/AST/DeclBase.h" -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/ASTMatchers/ASTMatchers.h" - -using namespace clang::ast_matchers; - -namespace clang { -namespace tidy { -namespace google { -namespace runtime { - -NonConstReferences::NonConstReferences(StringRef Name, - ClangTidyContext *Context) - : ClangTidyCheck(Name, Context), - IncludedTypes( - utils::options::parseStringList(Options.get("IncludedTypes", ""))) {} - -void NonConstReferences::storeOptions(ClangTidyOptions::OptionMap &Opts) { - Options.store(Opts, "IncludedTypes", - utils::options::serializeStringList(IncludedTypes)); -} - -void NonConstReferences::registerMatchers(MatchFinder *Finder) { - Finder->addMatcher( - parmVarDecl( - unless(isInstantiated()), - hasType(references( - qualType(unless(isConstQualified())).bind("referenced_type"))), - unless(hasType(rValueReferenceType()))) - .bind("param"), - this); -} - -void NonConstReferences::check(const MatchFinder::MatchResult &Result) { - const auto *Parameter = Result.Nodes.getNodeAs("param"); - const auto *Function = - dyn_cast_or_null(Parameter->getParentFunctionOrMethod()); - - if (Function == nullptr || Function->isImplicit()) - return; - - if (Function->getLocation().isMacroID()) - return; - - if (!Function->isCanonicalDecl()) - return; - - if (const auto *Method = dyn_cast(Function)) { - // Don't warn on implementations of an interface using references. - if (Method->begin_overridden_methods() != Method->end_overridden_methods()) - return; - // Don't warn on lambdas, as they frequently have to conform to the - // interface defined elsewhere. - if (Method->getParent()->isLambda()) - return; - } - - auto ReferencedType = *Result.Nodes.getNodeAs("referenced_type"); - - if (std::find_if(IncludedTypes.begin(), IncludedTypes.end(), - [&](llvm::StringRef ExplicitType) { - return ReferencedType.getCanonicalType().getAsString( - Result.Context->getPrintingPolicy()) == - ExplicitType; - }) != IncludedTypes.end()) - return; - - // Don't warn on function references, they shouldn't be constant. - if (ReferencedType->isFunctionProtoType()) - return; - - // Don't warn on dependent types in templates. - if (ReferencedType->isDependentType()) - return; - - if (Function->isOverloadedOperator()) { - switch (Function->getOverloadedOperator()) { - case clang::OO_LessLess: - case clang::OO_PlusPlus: - case clang::OO_MinusMinus: - case clang::OO_PlusEqual: - case clang::OO_MinusEqual: - case clang::OO_StarEqual: - case clang::OO_SlashEqual: - case clang::OO_PercentEqual: - case clang::OO_LessLessEqual: - case clang::OO_GreaterGreaterEqual: - case clang::OO_PipeEqual: - case clang::OO_CaretEqual: - case clang::OO_AmpEqual: - // Don't warn on the first parameter of operator<<(Stream&, ...), - // operator++, operator-- and operation+assignment operators. - if (Function->getParamDecl(0) == Parameter) - return; - break; - case clang::OO_GreaterGreater: { - auto isNonConstRef = [](clang::QualType T) { - return T->isReferenceType() && - !T.getNonReferenceType().isConstQualified(); - }; - // Don't warn on parameters of stream extractors: - // Stream& operator>>(Stream&, Value&); - // Both parameters should be non-const references by convention. - if (isNonConstRef(Function->getParamDecl(0)->getType()) && - (Function->getNumParams() < 2 || // E.g. member operator>>. - isNonConstRef(Function->getParamDecl(1)->getType())) && - isNonConstRef(Function->getReturnType())) - return; - break; - } - default: - break; - } - } - - // Some functions use references to comply with established standards. - if (Function->getDeclName().isIdentifier() && Function->getName() == "swap") - return; - - // iostream parameters are typically passed by non-const reference. - if (StringRef(ReferencedType.getAsString()).endswith("stream")) - return; - - if (Parameter->getName().empty()) { - diag(Parameter->getLocation(), "non-const reference parameter at index %0, " - "make it const or use a pointer") - << Parameter->getFunctionScopeIndex(); - } else { - diag(Parameter->getLocation(), - "non-const reference parameter %0, make it const or use a pointer") - << Parameter; - } -} - -} // namespace runtime -} // namespace google -} // namespace tidy -} // namespace clang diff --git a/clang-tools-extra/clang-tidy/google/NonConstReferences.h b/clang-tools-extra/clang-tidy/google/NonConstReferences.h deleted file mode 100644 index ad7d4e4b1bca0..0000000000000 --- a/clang-tools-extra/clang-tidy/google/NonConstReferences.h +++ /dev/null @@ -1,41 +0,0 @@ -//===--- NonConstReferences.h - clang-tidy ----------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_NON_CONST_REFERENCES_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_NON_CONST_REFERENCES_H - -#include "../ClangTidyCheck.h" - -namespace clang { -namespace tidy { -namespace google { -namespace runtime { - -/// Checks the usage of non-constant references in function parameters. -/// -/// https://google.github.io/styleguide/cppguide.html#Reference_Arguments -class NonConstReferences : public ClangTidyCheck { -public: - NonConstReferences(StringRef Name, ClangTidyContext *Context); - bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { - return LangOpts.CPlusPlus; - } - void registerMatchers(ast_matchers::MatchFinder *Finder) override; - void check(const ast_matchers::MatchFinder::MatchResult &Result) override; - void storeOptions(ClangTidyOptions::OptionMap &Opts) override; - -private: - const std::vector IncludedTypes; -}; - -} // namespace runtime -} // namespace google -} // namespace tidy -} // namespace clang - -#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_NON_CONST_REFERENCES_H diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index ac4802e6d498c..1df398c01cabe 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -120,6 +120,9 @@ Changes in existing checks Added an option `GetConfigPerFile` to support including files which use different naming styles. +- Removed `google-runtime-references` check because the rule it checks does + not exist in the Google Style Guide anymore. + Improvements to include-fixer ----------------------------- diff --git a/clang-tools-extra/docs/clang-tidy/checks/google-runtime-references.rst b/clang-tools-extra/docs/clang-tidy/checks/google-runtime-references.rst deleted file mode 100644 index 52de1f1087389..0000000000000 --- a/clang-tools-extra/docs/clang-tidy/checks/google-runtime-references.rst +++ /dev/null @@ -1,17 +0,0 @@ -.. title:: clang-tidy - google-runtime-references - -google-runtime-references -========================= - -Checks the usage of non-constant references in function parameters. - -The corresponding style guide rule: -https://google.github.io/styleguide/cppguide.html#Reference_Arguments - - -Options -------- - -.. option:: IncludedTypes - - A semicolon-separated list of names of types to explicitly include. Default is empty. diff --git a/clang-tools-extra/test/clang-tidy/checkers/google-runtime-references.cpp b/clang-tools-extra/test/clang-tidy/checkers/google-runtime-references.cpp deleted file mode 100644 index e70ec3aeddf04..0000000000000 --- a/clang-tools-extra/test/clang-tidy/checkers/google-runtime-references.cpp +++ /dev/null @@ -1,155 +0,0 @@ -// RUN: %check_clang_tidy %s google-runtime-references %t -- \ -// RUN: -config="{CheckOptions: \ -// RUN: [{key: google-runtime-references.IncludedTypes, \ -// RUN: value: 'included::A; included::B'}]}" - -int a; -int &b = a; -int *c; -void f1(int a); -void f2(int *b); -void f3(const int &c); -void f4(int const &d); - -// Don't warn on implicit operator= in c++11 mode. -class A { - virtual void f() {} -}; -// Don't warn on rvalue-references. -struct A2 { - A2(A2&&) = default; - void f(A2&&) {} -}; - -// Don't warn on iostream parameters. -namespace xxx { -class istream { }; -class ostringstream { }; -} -void g1(xxx::istream &istr); -void g1(xxx::ostringstream &istr); - -void g1(int &a); -// CHECK-MESSAGES: [[@LINE-1]]:14: warning: non-const reference parameter 'a', make it const or use a pointer [google-runtime-references] - -struct s {}; -void g2(int a, int b, s c, s &d); -// CHECK-MESSAGES: [[@LINE-1]]:31: warning: non-const reference parameter 'd', {{.*}} - -typedef int &ref; -void g3(ref a); -// CHECK-MESSAGES: [[@LINE-1]]:13: warning: non-const reference {{.*}} - -void g4(int &a, int &b, int &); -// CHECK-MESSAGES: [[@LINE-1]]:14: warning: non-const reference parameter 'a', {{.*}} -// CHECK-MESSAGES: [[@LINE-2]]:22: warning: non-const reference parameter 'b', {{.*}} -// CHECK-MESSAGES: [[@LINE-3]]:30: warning: non-const reference parameter at index 2, {{.*}} - -class B { - B(B& a) {} -// CHECK-MESSAGES: [[@LINE-1]]:8: warning: non-const reference {{.*}} - virtual void f(int &a) {} -// CHECK-MESSAGES: [[@LINE-1]]:23: warning: non-const reference {{.*}} - void g(int &b); -// CHECK-MESSAGES: [[@LINE-1]]:15: warning: non-const reference {{.*}} - - // Don't warn on the parameter of stream extractors defined as members. - B& operator>>(int& val) { return *this; } -}; - -// Only warn on the first declaration of each function to reduce duplicate -// warnings. -void B::g(int &b) {} - -// Don't warn on the first parameter of stream inserters. -A& operator<<(A& s, int&) { return s; } -// CHECK-MESSAGES: [[@LINE-1]]:25: warning: non-const reference parameter at index 1, {{.*}} - -// Don't warn on either parameter of stream extractors. Both need to be -// non-const references by convention. -A& operator>>(A& input, int& val) { return input; } - -// Don't warn on lambdas. -auto lambda = [] (int&) {}; - -// Don't warn on typedefs, as we'll warn on the function itself. -typedef int (*fp)(int &); - -// Don't warn on function references. -typedef void F(); -void g5(const F& func) {} -void g6(F& func) {} - -template -void g7(const T& t) {} - -template -void g8(T t) {} - -void f5() { - g5(f5); - g6(f5); - g7(f5); - g7(f5); - g8(f5); - g8(f5); -} - -// Don't warn on dependent types. -template -void g9(T& t) {} -template -void g10(T t) {} - -void f6() { - int i; - float f; - g9(i); - g9(i); - g9(i); - g10(i); - g10(f); -} - -// Warn only on the overridden methods from the base class, as the child class -// only implements the interface. -class C : public B { - C(); - virtual void f(int &a) {} -}; - -// Don't warn on operator<< with streams-like interface. -A& operator<<(A& s, int) { return s; } - -// Don't warn on swap(). -void swap(C& c1, C& c2) {} - -// Don't warn on standalone operator++, operator--, operator+=, operator-=, -// operator*=, etc. that all need non-const references to be functional. -A& operator++(A& a) { return a; } -A operator++(A& a, int) { return a; } -A& operator--(A& a) { return a; } -A operator--(A& a, int) { return a; } -A& operator+=(A& a, const A& b) { return a; } -A& operator-=(A& a, const A& b) { return a; } -A& operator*=(A& a, const A& b) { return a; } -A& operator/=(A& a, const A& b) { return a; } -A& operator%=(A& a, const A& b) { return a; } -A& operator<<=(A& a, const A& b) { return a; } -A& operator>>=(A& a, const A& b) { return a; } -A& operator|=(A& a, const A& b) { return a; } -A& operator^=(A& a, const A& b) { return a; } -A& operator&=(A& a, const A& b) { return a; } - -namespace included { -class A {}; -class B {}; -void f7(A &); -void f8(B &); -} -void f9(included::A &); -void f10(included::B &); - -#define DEFINE_F(name) void name(int& a) - -DEFINE_F(func) {} From 95429b88a469ee6537f15c82aec9f83e699b8b02 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 6 Oct 2020 12:02:07 +0000 Subject: [PATCH 128/321] [gn build] Port d6c9dc3c17e --- .../gn/secondary/clang-tools-extra/clang-tidy/google/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/google/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/google/BUILD.gn index 204a25c0eb660..51beb9fec77bb 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/google/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/google/BUILD.gn @@ -24,7 +24,6 @@ static_library("google") { "GlobalVariableDeclarationCheck.cpp", "GoogleTidyModule.cpp", "IntegerTypesCheck.cpp", - "NonConstReferences.cpp", "OverloadedUnaryAndCheck.cpp", "TodoCommentCheck.cpp", "UnnamedNamespaceInHeaderCheck.cpp", From 7bbb65b0a431554ee74b875aec77d40f5c387596 Mon Sep 17 00:00:00 2001 From: Alexey Lapshin Date: Tue, 6 Oct 2020 14:55:45 +0300 Subject: [PATCH 129/321] [llvm-objcopy][NFC] fix style issues reported by clang-format. --- llvm/tools/llvm-objcopy/COFF/Object.h | 2 +- llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp | 24 +++++++++---------- llvm/tools/llvm-objcopy/ELF/Object.cpp | 27 ++++++++++------------ llvm/tools/llvm-objcopy/ELF/Object.h | 13 +++++++---- 4 files changed, 33 insertions(+), 33 deletions(-) diff --git a/llvm/tools/llvm-objcopy/COFF/Object.h b/llvm/tools/llvm-objcopy/COFF/Object.h index 31233783a90a0..0e854b58cbdba 100644 --- a/llvm/tools/llvm-objcopy/COFF/Object.h +++ b/llvm/tools/llvm-objcopy/COFF/Object.h @@ -26,7 +26,7 @@ namespace coff { struct Relocation { Relocation() = default; - Relocation(const object::coff_relocation& R) : Reloc(R) {} + Relocation(const object::coff_relocation &R) : Reloc(R) {} object::coff_relocation Reloc; size_t Target = 0; diff --git a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp index 5a34153647ccd..f0bb594b499c1 100644 --- a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp @@ -184,10 +184,9 @@ findBuildID(const CopyConfig &Config, const object::ELFFile &In) { return createFileError(Config.InputFilename, std::move(Err)); } - return createFileError( - Config.InputFilename, - createStringError(llvm::errc::invalid_argument, - "could not find build ID")); + return createFileError(Config.InputFilename, + createStringError(llvm::errc::invalid_argument, + "could not find build ID")); } static Expected> @@ -205,7 +204,8 @@ findBuildID(const CopyConfig &Config, const object::ELFObjectFileBase &In) { } template -static Error makeStringError(std::error_code EC, const Twine &Msg, Ts &&... Args) { +static Error makeStringError(std::error_code EC, const Twine &Msg, + Ts &&... Args) { std::string FullMsg = (EC.message() + ": " + Msg).str(); return createStringError(EC, FullMsg.c_str(), std::forward(Args)...); } @@ -318,20 +318,20 @@ static bool isCompressable(const SectionBase &Sec) { static Error replaceDebugSections( Object &Obj, SectionPred &RemovePred, - function_ref shouldReplace, - function_ref(const SectionBase *)> addSection) { + function_ref ShouldReplace, + function_ref(const SectionBase *)> AddSection) { // Build a list of the debug sections we are going to replace. // We can't call `AddSection` while iterating over sections, // because it would mutate the sections array. SmallVector ToReplace; for (auto &Sec : Obj.sections()) - if (shouldReplace(Sec)) + if (ShouldReplace(Sec)) ToReplace.push_back(&Sec); // Build a mapping from original section to a new one. DenseMap FromTo; for (SectionBase *S : ToReplace) { - Expected NewSection = addSection(S); + Expected NewSection = AddSection(S); if (!NewSection) return NewSection.takeError(); @@ -344,8 +344,8 @@ static Error replaceDebugSections( for (auto &Sec : Obj.sections()) Sec.replaceSectionReferences(FromTo); - RemovePred = [shouldReplace, RemovePred](const SectionBase &Sec) { - return shouldReplace(Sec) || RemovePred(Sec); + RemovePred = [ShouldReplace, RemovePred](const SectionBase &Sec) { + return ShouldReplace(Sec) || RemovePred(Sec); }; return Error::success(); @@ -792,7 +792,7 @@ Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In, return Obj.takeError(); const ElfType OutputElfType = - getOutputElfType(Config.OutputArch.getValueOr(MachineInfo())); + getOutputElfType(Config.OutputArch.getValueOr(MachineInfo())); if (Error E = handleArgs(Config, **Obj, Reader, OutputElfType)) return E; return writeOutput(Config, **Obj, Out, OutputElfType); diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/tools/llvm-objcopy/ELF/Object.cpp index 5eadb5c683bd9..b0315d6fdc463 100644 --- a/llvm/tools/llvm-objcopy/ELF/Object.cpp +++ b/llvm/tools/llvm-objcopy/ELF/Object.cpp @@ -51,12 +51,11 @@ template void ELFWriter::writePhdr(const Segment &Seg) { } Error SectionBase::removeSectionReferences( - bool AllowBrokenLinks, - function_ref ToRemove) { + bool, function_ref) { return Error::success(); } -Error SectionBase::removeSymbols(function_ref ToRemove) { +Error SectionBase::removeSymbols(function_ref) { return Error::success(); } @@ -751,8 +750,7 @@ void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type, } Error SymbolTableSection::removeSectionReferences( - bool AllowBrokenLinks, - function_ref ToRemove) { + bool AllowBrokenLinks, function_ref ToRemove) { if (ToRemove(SectionIndexTable)) SectionIndexTable = nullptr; if (ToRemove(SymbolNames)) { @@ -896,8 +894,7 @@ Error SymbolTableSection::accept(MutableSectionVisitor &Visitor) { } Error RelocationSection::removeSectionReferences( - bool AllowBrokenLinks, - function_ref ToRemove) { + bool AllowBrokenLinks, function_ref ToRemove) { if (ToRemove(Symbols)) { if (!AllowBrokenLinks) return createStringError( @@ -962,7 +959,7 @@ void RelocSectionWithSymtabBase::finalize() { } template -static void setAddend(Elf_Rel_Impl &Rel, uint64_t Addend) {} +static void setAddend(Elf_Rel_Impl &, uint64_t) {} template static void setAddend(Elf_Rel_Impl &Rela, uint64_t Addend) { @@ -1607,7 +1604,7 @@ Error ELFBuilder::initSymbolTable(SymbolTableSection *SymTab) { } template -static void getAddend(uint64_t &ToSet, const Elf_Rel_Impl &Rel) {} +static void getAddend(uint64_t &, const Elf_Rel_Impl &) {} template static void getAddend(uint64_t &ToSet, const Elf_Rel_Impl &Rela) { @@ -2099,8 +2096,8 @@ ELFWriter::ELFWriter(Object &Obj, Buffer &Buf, bool WSH, : Writer(Obj, Buf), WriteSectionHeaders(WSH && Obj.HadShdrs), OnlyKeepDebug(OnlyKeepDebug) {} -Error Object::removeSections(bool AllowBrokenLinks, - std::function ToRemove) { +Error Object::removeSections( + bool AllowBrokenLinks, std::function ToRemove) { auto Iter = std::stable_partition( std::begin(Sections), std::end(Sections), [=](const SecPtr &Sec) { @@ -2136,8 +2133,8 @@ Error Object::removeSections(bool AllowBrokenLinks, // a live section critically depends on a section being removed somehow // (e.g. the removed section is referenced by a relocation). for (auto &KeepSec : make_range(std::begin(Sections), Iter)) { - if (Error E = KeepSec->removeSectionReferences(AllowBrokenLinks, - [&RemoveSections](const SectionBase *Sec) { + if (Error E = KeepSec->removeSectionReferences( + AllowBrokenLinks, [&RemoveSections](const SectionBase *Sec) { return RemoveSections.find(Sec) != RemoveSections.end(); })) return E; @@ -2623,8 +2620,8 @@ Error IHexWriter::checkSection(const SectionBase &Sec) { if (addressOverflows32bit(Addr) || addressOverflows32bit(Addr + Sec.Size - 1)) return createStringError( errc::invalid_argument, - "Section '%s' address range [0x%llx, 0x%llx] is not 32 bit", Sec.Name.c_str(), - Addr, Addr + Sec.Size - 1); + "Section '%s' address range [0x%llx, 0x%llx] is not 32 bit", + Sec.Name.c_str(), Addr, Addr + Sec.Size - 1); return Error::success(); } diff --git a/llvm/tools/llvm-objcopy/ELF/Object.h b/llvm/tools/llvm-objcopy/ELF/Object.h index 8fee4e29e964d..81851813a3e79 100644 --- a/llvm/tools/llvm-objcopy/ELF/Object.h +++ b/llvm/tools/llvm-objcopy/ELF/Object.h @@ -484,7 +484,8 @@ class Section : public SectionBase { Error accept(SectionVisitor &Visitor) const override; Error accept(MutableSectionVisitor &Visitor) override; - Error removeSectionReferences(bool AllowBrokenLinks, + Error removeSectionReferences( + bool AllowBrokenLinks, function_ref ToRemove) override; Error initialize(SectionTableRef SecTable) override; void finalize() override; @@ -647,13 +648,13 @@ class SectionIndexSection : public SectionBase { virtual ~SectionIndexSection() {} void addIndex(uint32_t Index) { assert(Size > 0); - Indexes.push_back(Index); + Indexes.push_back(Index); } void reserve(size_t NumSymbols) { Indexes.reserve(NumSymbols); Size = NumSymbols * 4; - } + } void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; } Error initialize(SectionTableRef SecTable) override; void finalize() override; @@ -700,7 +701,8 @@ class SymbolTableSection : public SectionBase { Expected getSymbolByIndex(uint32_t Index); void updateSymbols(function_ref Callable); - Error removeSectionReferences(bool AllowBrokenLinks, + Error removeSectionReferences( + bool AllowBrokenLinks, function_ref ToRemove) override; Error initialize(SectionTableRef SecTable) override; void finalize() override; @@ -770,7 +772,8 @@ class RelocationSection void addRelocation(Relocation Rel) { Relocations.push_back(Rel); } Error accept(SectionVisitor &Visitor) const override; Error accept(MutableSectionVisitor &Visitor) override; - Error removeSectionReferences(bool AllowBrokenLinks, + Error removeSectionReferences( + bool AllowBrokenLinks, function_ref ToRemove) override; Error removeSymbols(function_ref ToRemove) override; void markSymbols() override; From 0b402e985e41e8c01768a0d026adbc25c2274744 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 6 Oct 2020 12:50:04 +0100 Subject: [PATCH 130/321] [InstCombine] FoldShiftByConstant - remove unnecessary cast<>. NFC. Op1 is already a Constant* --- llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index b02e4eaa3ceef..88357922fd18a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -693,8 +693,7 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, if (TrOp && I.isLogicalShift() && TrOp->isShift() && isa(TrOp->getOperand(1))) { // Okay, we'll do this xform. Make the shift of shift. - Constant *ShAmt = - ConstantExpr::getZExt(cast(Op1), TrOp->getType()); + Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); // (shift2 (shift1 & 0x00FF), c2) Value *NSh = Builder.CreateBinOp(I.getOpcode(), TrOp, ShAmt, I.getName()); From 21100f885d5bab4105ed2630b480b03d466f3da9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 6 Oct 2020 13:12:47 +0100 Subject: [PATCH 131/321] [InstCombine] FoldShiftByConstant - use PatternMatch for logicalshift(trunc(shift(x,c1)),c2) fold. NFCI. --- .../Transforms/InstCombine/InstCombineShifts.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 88357922fd18a..6e12f8011a360 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -683,17 +683,19 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, return FoldedShift; // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) - if (TruncInst *TI = dyn_cast(Op0)) { - Instruction *TrOp = dyn_cast(TI->getOperand(0)); + if (auto *TI = dyn_cast(Op0)) { // If 'shift2' is an ashr, we would have to get the sign bit into a funny // place. Don't try to do this transformation in this case. Also, we // require that the input operand is a shift-by-constant so that we have // confidence that the shifts will get folded together. We could do this // xform in more cases, but it is unlikely to be profitable. - if (TrOp && I.isLogicalShift() && TrOp->isShift() && - isa(TrOp->getOperand(1))) { + if (I.isLogicalShift() && + match(TI->getOperand(0), m_Shift(m_Value(), m_ConstantInt()))) { + auto *TrOp = cast(TI->getOperand(0)); + Type *SrcTy = TrOp->getType(); + // Okay, we'll do this xform. Make the shift of shift. - Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); + Constant *ShAmt = ConstantExpr::getZExt(Op1, SrcTy); // (shift2 (shift1 & 0x00FF), c2) Value *NSh = Builder.CreateBinOp(I.getOpcode(), TrOp, ShAmt, I.getName()); @@ -701,7 +703,7 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, // part of the register be zeros. Emulate this by inserting an AND to // clear the top bits as needed. This 'and' will usually be zapped by // other xforms later if dead. - unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); + unsigned SrcSize = SrcTy->getScalarSizeInBits(); unsigned DstSize = TI->getType()->getScalarSizeInBits(); APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); From a825eaa90e2419e3e9ae64ec047440ce98e7bacb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Mon, 5 Oct 2020 23:07:00 +0200 Subject: [PATCH 132/321] [lldb] [Platform] Move common ::DebugProcess() to PlatformPOSIX Move common ::DebugProcess() implementation shared by Linux and NetBSD (and to be shared by FreeBSD shortly) into PlatformPOSIX, and move the old base implementation used only by Darwin to PlatformDarwin. Differential Revision: https://reviews.llvm.org/D88852 --- .../Plugins/Platform/Linux/PlatformLinux.cpp | 115 ------------------ .../Plugins/Platform/Linux/PlatformLinux.h | 4 - .../Platform/MacOSX/PlatformDarwin.cpp | 27 ++++ .../Plugins/Platform/MacOSX/PlatformDarwin.h | 6 + .../Platform/NetBSD/PlatformNetBSD.cpp | 115 ------------------ .../Plugins/Platform/NetBSD/PlatformNetBSD.h | 4 - .../Plugins/Platform/POSIX/PlatformPOSIX.cpp | 111 +++++++++++++++-- 7 files changed, 134 insertions(+), 248 deletions(-) diff --git a/lldb/source/Plugins/Platform/Linux/PlatformLinux.cpp b/lldb/source/Plugins/Platform/Linux/PlatformLinux.cpp index 577aa77c2ece9..a12aa1a716be4 100644 --- a/lldb/source/Plugins/Platform/Linux/PlatformLinux.cpp +++ b/lldb/source/Plugins/Platform/Linux/PlatformLinux.cpp @@ -262,121 +262,6 @@ bool PlatformLinux::CanDebugProcess() { } } -// For local debugging, Linux will override the debug logic to use llgs-launch -// rather than lldb-launch, llgs-attach. This differs from current lldb- -// launch, debugserver-attach approach on MacOSX. -lldb::ProcessSP -PlatformLinux::DebugProcess(ProcessLaunchInfo &launch_info, Debugger &debugger, - Target *target, // Can be NULL, if NULL create a new - // target, else use existing one - Status &error) { - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PLATFORM)); - LLDB_LOG(log, "target {0}", target); - - // If we're a remote host, use standard behavior from parent class. - if (!IsHost()) - return PlatformPOSIX::DebugProcess(launch_info, debugger, target, error); - - // - // For local debugging, we'll insist on having ProcessGDBRemote create the - // process. - // - - ProcessSP process_sp; - - // Make sure we stop at the entry point - launch_info.GetFlags().Set(eLaunchFlagDebug); - - // We always launch the process we are going to debug in a separate process - // group, since then we can handle ^C interrupts ourselves w/o having to - // worry about the target getting them as well. - launch_info.SetLaunchInSeparateProcessGroup(true); - - // Ensure we have a target. - if (target == nullptr) { - LLDB_LOG(log, "creating new target"); - TargetSP new_target_sp; - error = debugger.GetTargetList().CreateTarget( - debugger, "", "", eLoadDependentsNo, nullptr, new_target_sp); - if (error.Fail()) { - LLDB_LOG(log, "failed to create new target: {0}", error); - return process_sp; - } - - target = new_target_sp.get(); - if (!target) { - error.SetErrorString("CreateTarget() returned nullptr"); - LLDB_LOG(log, "error: {0}", error); - return process_sp; - } - } - - // Mark target as currently selected target. - debugger.GetTargetList().SetSelectedTarget(target); - - // Now create the gdb-remote process. - LLDB_LOG(log, "having target create process with gdb-remote plugin"); - process_sp = - target->CreateProcess(launch_info.GetListener(), "gdb-remote", nullptr); - - if (!process_sp) { - error.SetErrorString("CreateProcess() failed for gdb-remote process"); - LLDB_LOG(log, "error: {0}", error); - return process_sp; - } - - LLDB_LOG(log, "successfully created process"); - // Adjust launch for a hijacker. - ListenerSP listener_sp; - if (!launch_info.GetHijackListener()) { - LLDB_LOG(log, "setting up hijacker"); - listener_sp = - Listener::MakeListener("lldb.PlatformLinux.DebugProcess.hijack"); - launch_info.SetHijackListener(listener_sp); - process_sp->HijackProcessEvents(listener_sp); - } - - // Log file actions. - if (log) { - LLDB_LOG(log, "launching process with the following file actions:"); - StreamString stream; - size_t i = 0; - const FileAction *file_action; - while ((file_action = launch_info.GetFileActionAtIndex(i++)) != nullptr) { - file_action->Dump(stream); - LLDB_LOG(log, "{0}", stream.GetData()); - stream.Clear(); - } - } - - // Do the launch. - error = process_sp->Launch(launch_info); - if (error.Success()) { - // Handle the hijacking of process events. - if (listener_sp) { - const StateType state = process_sp->WaitForProcessToStop( - llvm::None, nullptr, false, listener_sp); - - LLDB_LOG(log, "pid {0} state {0}", process_sp->GetID(), state); - } - - // Hook up process PTY if we have one (which we should for local debugging - // with llgs). - int pty_fd = launch_info.GetPTY().ReleasePrimaryFileDescriptor(); - if (pty_fd != PseudoTerminal::invalid_fd) { - process_sp->SetSTDIOFileDescriptor(pty_fd); - LLDB_LOG(log, "hooked up STDIO pty to process"); - } else - LLDB_LOG(log, "not using process STDIO pty"); - } else { - LLDB_LOG(log, "{0}", error); - // FIXME figure out appropriate cleanup here. Do we delete the target? Do - // we delete the process? Does our caller do that? - } - - return process_sp; -} - void PlatformLinux::CalculateTrapHandlerSymbolNames() { m_trap_handlers.push_back(ConstString("_sigtramp")); m_trap_handlers.push_back(ConstString("__kernel_rt_sigreturn")); diff --git a/lldb/source/Plugins/Platform/Linux/PlatformLinux.h b/lldb/source/Plugins/Platform/Linux/PlatformLinux.h index 765a0a86a4ef3..fe0ad200d0857 100644 --- a/lldb/source/Plugins/Platform/Linux/PlatformLinux.h +++ b/lldb/source/Plugins/Platform/Linux/PlatformLinux.h @@ -48,10 +48,6 @@ class PlatformLinux : public PlatformPOSIX { bool CanDebugProcess() override; - lldb::ProcessSP DebugProcess(ProcessLaunchInfo &launch_info, - Debugger &debugger, Target *target, - Status &error) override; - void CalculateTrapHandlerSymbolNames() override; MmapArgList GetMmapArgumentList(const ArchSpec &arch, lldb::addr_t addr, diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp index baa9d7b50ad63..4e2df72ee3a8e 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp @@ -1226,6 +1226,33 @@ PlatformDarwin::GetResumeCountForLaunchInfo(ProcessLaunchInfo &launch_info) { return 1; } +lldb::ProcessSP +PlatformDarwin::DebugProcess(ProcessLaunchInfo &launch_info, Debugger &debugger, + Target *target, // Can be NULL, if NULL create + // a new target, else use existing + // one + Status &error) { + ProcessSP process_sp; + + if (IsHost()) { + // We are going to hand this process off to debugserver which will be in + // charge of setting the exit status. However, we still need to reap it + // from lldb. So, make sure we use a exit callback which does not set exit + // status. + const bool monitor_signals = false; + launch_info.SetMonitorProcessCallback( + &ProcessLaunchInfo::NoOpMonitorCallback, monitor_signals); + process_sp = Platform::DebugProcess(launch_info, debugger, target, error); + } else { + if (m_remote_platform_sp) + process_sp = m_remote_platform_sp->DebugProcess(launch_info, debugger, + target, error); + else + error.SetErrorString("the platform is not currently connected"); + } + return process_sp; +} + void PlatformDarwin::CalculateTrapHandlerSymbolNames() { m_trap_handlers.push_back(ConstString("_sigtramp")); } diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h index 8e28a70003106..64360b439d106 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h @@ -11,6 +11,7 @@ #include "Plugins/Platform/POSIX/PlatformPOSIX.h" #include "lldb/Host/FileSystem.h" +#include "lldb/Host/ProcessLaunchInfo.h" #include "lldb/Utility/ConstString.h" #include "lldb/Utility/FileSpec.h" #include "lldb/Utility/StructuredData.h" @@ -68,6 +69,11 @@ class PlatformDarwin : public PlatformPOSIX { int32_t GetResumeCountForLaunchInfo( lldb_private::ProcessLaunchInfo &launch_info) override; + lldb::ProcessSP DebugProcess(lldb_private::ProcessLaunchInfo &launch_info, + lldb_private::Debugger &debugger, + lldb_private::Target *target, + lldb_private::Status &error) override; + void CalculateTrapHandlerSymbolNames() override; llvm::VersionTuple diff --git a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp index caebd79c853e7..0e59e5aa2ec25 100644 --- a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp +++ b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp @@ -231,121 +231,6 @@ bool PlatformNetBSD::CanDebugProcess() { } } -// For local debugging, NetBSD will override the debug logic to use llgs-launch -// rather than lldb-launch, llgs-attach. This differs from current lldb- -// launch, debugserver-attach approach on MacOSX. -lldb::ProcessSP -PlatformNetBSD::DebugProcess(ProcessLaunchInfo &launch_info, Debugger &debugger, - Target *target, // Can be NULL, if NULL create a new - // target, else use existing one - Status &error) { - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PLATFORM)); - LLDB_LOG(log, "target {0}", target); - - // If we're a remote host, use standard behavior from parent class. - if (!IsHost()) - return PlatformPOSIX::DebugProcess(launch_info, debugger, target, error); - - // - // For local debugging, we'll insist on having ProcessGDBRemote create the - // process. - // - - ProcessSP process_sp; - - // Make sure we stop at the entry point - launch_info.GetFlags().Set(eLaunchFlagDebug); - - // We always launch the process we are going to debug in a separate process - // group, since then we can handle ^C interrupts ourselves w/o having to - // worry about the target getting them as well. - launch_info.SetLaunchInSeparateProcessGroup(true); - - // Ensure we have a target. - if (target == nullptr) { - LLDB_LOG(log, "creating new target"); - TargetSP new_target_sp; - error = debugger.GetTargetList().CreateTarget( - debugger, "", "", eLoadDependentsNo, nullptr, new_target_sp); - if (error.Fail()) { - LLDB_LOG(log, "failed to create new target: {0}", error); - return process_sp; - } - - target = new_target_sp.get(); - if (!target) { - error.SetErrorString("CreateTarget() returned nullptr"); - LLDB_LOG(log, "error: {0}", error); - return process_sp; - } - } - - // Mark target as currently selected target. - debugger.GetTargetList().SetSelectedTarget(target); - - // Now create the gdb-remote process. - LLDB_LOG(log, "having target create process with gdb-remote plugin"); - process_sp = - target->CreateProcess(launch_info.GetListener(), "gdb-remote", nullptr); - - if (!process_sp) { - error.SetErrorString("CreateProcess() failed for gdb-remote process"); - LLDB_LOG(log, "error: {0}", error); - return process_sp; - } - - LLDB_LOG(log, "successfully created process"); - // Adjust launch for a hijacker. - ListenerSP listener_sp; - if (!launch_info.GetHijackListener()) { - LLDB_LOG(log, "setting up hijacker"); - listener_sp = - Listener::MakeListener("lldb.PlatformNetBSD.DebugProcess.hijack"); - launch_info.SetHijackListener(listener_sp); - process_sp->HijackProcessEvents(listener_sp); - } - - // Log file actions. - if (log) { - LLDB_LOG(log, "launching process with the following file actions:"); - StreamString stream; - size_t i = 0; - const FileAction *file_action; - while ((file_action = launch_info.GetFileActionAtIndex(i++)) != nullptr) { - file_action->Dump(stream); - LLDB_LOG(log, "{0}", stream.GetData()); - stream.Clear(); - } - } - - // Do the launch. - error = process_sp->Launch(launch_info); - if (error.Success()) { - // Handle the hijacking of process events. - if (listener_sp) { - const StateType state = process_sp->WaitForProcessToStop( - llvm::None, nullptr, false, listener_sp); - - LLDB_LOG(log, "pid {0} state {0}", process_sp->GetID(), state); - } - - // Hook up process PTY if we have one (which we should for local debugging - // with llgs). - int pty_fd = launch_info.GetPTY().ReleasePrimaryFileDescriptor(); - if (pty_fd != PseudoTerminal::invalid_fd) { - process_sp->SetSTDIOFileDescriptor(pty_fd); - LLDB_LOG(log, "hooked up STDIO pty to process"); - } else - LLDB_LOG(log, "not using process STDIO pty"); - } else { - LLDB_LOG(log, "{0}", error); - // FIXME figure out appropriate cleanup here. Do we delete the target? Do - // we delete the process? Does our caller do that? - } - - return process_sp; -} - void PlatformNetBSD::CalculateTrapHandlerSymbolNames() { m_trap_handlers.push_back(ConstString("_sigtramp")); } diff --git a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h index d53e584188846..36b228af8045a 100644 --- a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h +++ b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h @@ -48,10 +48,6 @@ class PlatformNetBSD : public PlatformPOSIX { bool CanDebugProcess() override; - lldb::ProcessSP DebugProcess(ProcessLaunchInfo &launch_info, - Debugger &debugger, Target *target, - Status &error) override; - void CalculateTrapHandlerSymbolNames() override; MmapArgList GetMmapArgumentList(const ArchSpec &arch, lldb::addr_t addr, diff --git a/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp b/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp index 180ea1d2cfd14..0028fec51bba3 100644 --- a/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp +++ b/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp @@ -416,24 +416,115 @@ PlatformPOSIX::DebugProcess(ProcessLaunchInfo &launch_info, Debugger &debugger, Target *target, // Can be NULL, if NULL create a new // target, else use existing one Status &error) { + Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PLATFORM)); + LLDB_LOG(log, "target {0}", target); + ProcessSP process_sp; - if (IsHost()) { - // We are going to hand this process off to debugserver which will be in - // charge of setting the exit status. However, we still need to reap it - // from lldb. So, make sure we use a exit callback which does not set exit - // status. - const bool monitor_signals = false; - launch_info.SetMonitorProcessCallback( - &ProcessLaunchInfo::NoOpMonitorCallback, monitor_signals); - process_sp = Platform::DebugProcess(launch_info, debugger, target, error); - } else { + if (!IsHost()) { if (m_remote_platform_sp) process_sp = m_remote_platform_sp->DebugProcess(launch_info, debugger, target, error); else error.SetErrorString("the platform is not currently connected"); + return process_sp; + } + + // + // For local debugging, we'll insist on having ProcessGDBRemote create the + // process. + // + + // Make sure we stop at the entry point + launch_info.GetFlags().Set(eLaunchFlagDebug); + + // We always launch the process we are going to debug in a separate process + // group, since then we can handle ^C interrupts ourselves w/o having to + // worry about the target getting them as well. + launch_info.SetLaunchInSeparateProcessGroup(true); + + // Ensure we have a target. + if (target == nullptr) { + LLDB_LOG(log, "creating new target"); + TargetSP new_target_sp; + error = debugger.GetTargetList().CreateTarget( + debugger, "", "", eLoadDependentsNo, nullptr, new_target_sp); + if (error.Fail()) { + LLDB_LOG(log, "failed to create new target: {0}", error); + return process_sp; + } + + target = new_target_sp.get(); + if (!target) { + error.SetErrorString("CreateTarget() returned nullptr"); + LLDB_LOG(log, "error: {0}", error); + return process_sp; + } + } + + // Mark target as currently selected target. + debugger.GetTargetList().SetSelectedTarget(target); + + // Now create the gdb-remote process. + LLDB_LOG(log, "having target create process with gdb-remote plugin"); + process_sp = + target->CreateProcess(launch_info.GetListener(), "gdb-remote", nullptr); + + if (!process_sp) { + error.SetErrorString("CreateProcess() failed for gdb-remote process"); + LLDB_LOG(log, "error: {0}", error); + return process_sp; } + + LLDB_LOG(log, "successfully created process"); + // Adjust launch for a hijacker. + ListenerSP listener_sp; + if (!launch_info.GetHijackListener()) { + LLDB_LOG(log, "setting up hijacker"); + listener_sp = + Listener::MakeListener("lldb.PlatformLinux.DebugProcess.hijack"); + launch_info.SetHijackListener(listener_sp); + process_sp->HijackProcessEvents(listener_sp); + } + + // Log file actions. + if (log) { + LLDB_LOG(log, "launching process with the following file actions:"); + StreamString stream; + size_t i = 0; + const FileAction *file_action; + while ((file_action = launch_info.GetFileActionAtIndex(i++)) != nullptr) { + file_action->Dump(stream); + LLDB_LOG(log, "{0}", stream.GetData()); + stream.Clear(); + } + } + + // Do the launch. + error = process_sp->Launch(launch_info); + if (error.Success()) { + // Handle the hijacking of process events. + if (listener_sp) { + const StateType state = process_sp->WaitForProcessToStop( + llvm::None, nullptr, false, listener_sp); + + LLDB_LOG(log, "pid {0} state {0}", process_sp->GetID(), state); + } + + // Hook up process PTY if we have one (which we should for local debugging + // with llgs). + int pty_fd = launch_info.GetPTY().ReleasePrimaryFileDescriptor(); + if (pty_fd != PseudoTerminal::invalid_fd) { + process_sp->SetSTDIOFileDescriptor(pty_fd); + LLDB_LOG(log, "hooked up STDIO pty to process"); + } else + LLDB_LOG(log, "not using process STDIO pty"); + } else { + LLDB_LOG(log, "{0}", error); + // FIXME figure out appropriate cleanup here. Do we delete the target? Do + // we delete the process? Does our caller do that? + } + return process_sp; } From 5588dbce73be2c86bf1701b2ebbce47239130296 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Tue, 6 Oct 2020 13:20:15 +0200 Subject: [PATCH 133/321] [SystemZAsmParser] Treat VR128 separately in ParseDirectiveInsn(). This patch makes the parser - reject higher vector registers (>=16) in operands where they should not be accepted. - accept higher integers (>=16) in vector register operands. Review: Ulrich Weigand Differential Revision: https://reviews.llvm.org/D88888 --- .../SystemZ/AsmParser/SystemZAsmParser.cpp | 22 +++++++++++++------ llvm/test/MC/SystemZ/directive-insn-vector.s | 2 ++ llvm/test/MC/SystemZ/regs-bad.s | 7 ++++++ 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index be96612383c7d..8da4603817837 100644 --- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -635,18 +635,18 @@ static struct InsnMatchEntry InsnMatchTable[] = { { "ssf", SystemZ::InsnSSF, 4, { MCK_U48Imm, MCK_BDAddr64Disp12, MCK_BDAddr64Disp12, MCK_AnyReg } }, { "vri", SystemZ::InsnVRI, 6, - { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_U12Imm, MCK_U4Imm, MCK_U4Imm } }, + { MCK_U48Imm, MCK_VR128, MCK_VR128, MCK_U12Imm, MCK_U4Imm, MCK_U4Imm } }, { "vrr", SystemZ::InsnVRR, 7, - { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_AnyReg, MCK_U4Imm, MCK_U4Imm, + { MCK_U48Imm, MCK_VR128, MCK_VR128, MCK_VR128, MCK_U4Imm, MCK_U4Imm, MCK_U4Imm } }, { "vrs", SystemZ::InsnVRS, 5, - { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_BDAddr64Disp12, MCK_U4Imm } }, + { MCK_U48Imm, MCK_AnyReg, MCK_VR128, MCK_BDAddr64Disp12, MCK_U4Imm } }, { "vrv", SystemZ::InsnVRV, 4, - { MCK_U48Imm, MCK_AnyReg, MCK_BDVAddr64Disp12, MCK_U4Imm } }, + { MCK_U48Imm, MCK_VR128, MCK_BDVAddr64Disp12, MCK_U4Imm } }, { "vrx", SystemZ::InsnVRX, 4, - { MCK_U48Imm, MCK_AnyReg, MCK_BDXAddr64Disp12, MCK_U4Imm } }, + { MCK_U48Imm, MCK_VR128, MCK_BDXAddr64Disp12, MCK_U4Imm } }, { "vsi", SystemZ::InsnVSI, 4, - { MCK_U48Imm, MCK_AnyReg, MCK_BDAddr64Disp12, MCK_U8Imm } } + { MCK_U48Imm, MCK_VR128, MCK_BDAddr64Disp12, MCK_U8Imm } } }; static void printMCExpr(const MCExpr *E, raw_ostream &OS) { @@ -851,10 +851,11 @@ SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterKind Kind) { // Parse any type of register (including integers) and add it to Operands. OperandMatchResultTy SystemZAsmParser::parseAnyRegister(OperandVector &Operands) { + SMLoc StartLoc = Parser.getTok().getLoc(); + // Handle integer values. if (Parser.getTok().is(AsmToken::Integer)) { const MCExpr *Register; - SMLoc StartLoc = Parser.getTok().getLoc(); if (Parser.parseExpression(Register)) return MatchOperand_ParseFail; @@ -876,6 +877,11 @@ SystemZAsmParser::parseAnyRegister(OperandVector &Operands) { if (parseRegister(Reg)) return MatchOperand_ParseFail; + if (Reg.Num > 15) { + Error(StartLoc, "invalid register"); + return MatchOperand_ParseFail; + } + // Map to the correct register kind. RegisterKind Kind; unsigned RegNo; @@ -1208,6 +1214,8 @@ bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) { OperandMatchResultTy ResTy; if (Kind == MCK_AnyReg) ResTy = parseAnyReg(Operands); + else if (Kind == MCK_VR128) + ResTy = parseVR128(Operands); else if (Kind == MCK_BDXAddr64Disp12 || Kind == MCK_BDXAddr64Disp20) ResTy = parseBDXAddr64(Operands); else if (Kind == MCK_BDAddr64Disp12 || Kind == MCK_BDAddr64Disp20) diff --git a/llvm/test/MC/SystemZ/directive-insn-vector.s b/llvm/test/MC/SystemZ/directive-insn-vector.s index 04c53a8bbf85b..9d4a7c6a6f876 100644 --- a/llvm/test/MC/SystemZ/directive-insn-vector.s +++ b/llvm/test/MC/SystemZ/directive-insn-vector.s @@ -25,3 +25,5 @@ #CHECK: e6 0c 20 0c 01 35 vlrl %v16, 12(%r2), 12 .insn vsi,0xe60000000035,%v16,12(%r2),12 +#CHECK: e7 01 00 00 0c 56 vlr %v16, %v17 + .insn vrr,0xe70000000056,16,17,0,0,0,0 diff --git a/llvm/test/MC/SystemZ/regs-bad.s b/llvm/test/MC/SystemZ/regs-bad.s index db56af96638ba..320cba0fc856c 100644 --- a/llvm/test/MC/SystemZ/regs-bad.s +++ b/llvm/test/MC/SystemZ/regs-bad.s @@ -217,6 +217,13 @@ lxr %f0,16 lxr %f0,0(%r1) +# Test that a high (>=16) vector register is not accepted in a non-vector +# operand. +# +#CHECK: error: invalid register +#CHECK: .insn rr,0x1800,%v16,%v0 +.insn rr,0x1800,%v16,%v0 + # Test access register operands # #CHECK: error: invalid operand for instruction From 8fa45e1fd527269140c4e2a1652fef5500da16fd Mon Sep 17 00:00:00 2001 From: Chuyang Chen Date: Tue, 6 Oct 2020 08:45:26 -0400 Subject: [PATCH 134/321] Convert diagnostics about multi-character literals from extension to warning This addresses PR46797. --- clang/include/clang/Basic/DiagnosticLexKinds.td | 6 +++--- clang/lib/Lex/LiteralSupport.cpp | 4 ++-- clang/test/Lexer/constants.c | 2 +- clang/test/Lexer/multi-char-constants.c | 4 ++++ 4 files changed, 10 insertions(+), 6 deletions(-) create mode 100644 clang/test/Lexer/multi-char-constants.c diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 77d2e26ba7909..130e7687bad21 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -104,10 +104,10 @@ def warn_cxx98_compat_raw_string_literal : Warning< "raw string literals are incompatible with C++98">, InGroup, DefaultIgnore; -def ext_multichar_character_literal : ExtWarn< +def warn_multichar_character_literal : Warning< "multi-character character constant">, InGroup; -def ext_four_char_character_literal : Extension< - "multi-character character constant">, InGroup; +def warn_four_char_character_literal : Warning< + "multi-character character constant">, InGroup, DefaultIgnore; // Unicode and UCNs diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index eb16bc8c7da2d..6c3cdbdf64920 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -1373,9 +1373,9 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, if (isWide()) PP.Diag(Loc, diag::warn_extraneous_char_constant); else if (isAscii() && NumCharsSoFar == 4) - PP.Diag(Loc, diag::ext_four_char_character_literal); + PP.Diag(Loc, diag::warn_four_char_character_literal); else if (isAscii()) - PP.Diag(Loc, diag::ext_multichar_character_literal); + PP.Diag(Loc, diag::warn_multichar_character_literal); else PP.Diag(Loc, diag::err_multichar_utf_character_literal); IsMultiChar = true; diff --git a/clang/test/Lexer/constants.c b/clang/test/Lexer/constants.c index 9c84ddc019295..caa373720942a 100644 --- a/clang/test/Lexer/constants.c +++ b/clang/test/Lexer/constants.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -pedantic -ftrigraphs %s +// RUN: %clang_cc1 -fsyntax-only -verify -Wfour-char-constants -pedantic -ftrigraphs %s int x = 000000080; // expected-error {{invalid digit}} diff --git a/clang/test/Lexer/multi-char-constants.c b/clang/test/Lexer/multi-char-constants.c new file mode 100644 index 0000000000000..3281c68d1fb22 --- /dev/null +++ b/clang/test/Lexer/multi-char-constants.c @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -Wfour-char-constants -pedantic-errors %s + +int x = 'ab'; // expected-warning {{multi-character character constant}} +int y = 'abcd'; // expected-warning {{multi-character character constant}} From d8ee28b96ee77a466aea5e9ca9c6ed57b2194b4d Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 6 Oct 2020 11:40:52 +0000 Subject: [PATCH 135/321] [mlir][Linalg] Extend buffer allocation to support Linalg init tensors This revision adds init_tensors support to buffer allocation for Linalg on tensors. Currently makes the assumption that the init_tensors fold onto the first output tensors. This assumption is not currently enforced or cast in stone and requires experimenting with tiling linalg on tensors for ops **without reductions**. Still this allows progress towards the end-to-end goal. --- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 4 +- .../Linalg/Transforms/TensorsToBuffers.cpp | 48 ++++-- .../buffer-placement-preparation.mlir | 138 ++++++++++++++++++ .../lib/Transforms/TestBufferPlacement.cpp | 49 +++++-- 4 files changed, 210 insertions(+), 29 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 082078dee3afc..895085cf79cfb 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -374,7 +374,6 @@ LogicalResult BlockArgsVerifier::verify(IndexedGenericOp op, template static LogicalResult verifyGenericOp(GenericOpType op) { - auto nInputViews = op.getNumInputs(); auto nLoops = op.getNumLoops(); if (op.inputs().size() + op.output_buffers().size() + @@ -410,8 +409,7 @@ static LogicalResult verifyGenericOp(GenericOpType op) { auto idx = en.index(); auto m = en.value().template cast().getValue(); indexingMaps.push_back(m); // Save reference to map for further checks. - auto view = (idx < nInputViews) ? op.getInputShapedType(idx) - : op.getOutputShapedType(idx - nInputViews); + auto view = op.getShapedType(idx); if (m.getNumSymbols() != expectedNumSymbols) return op.emitOpError("expected the number of symbols in indexing_map #") diff --git a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp index 7f671fc9f99e9..b714a1f6c6428 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp @@ -39,32 +39,50 @@ class GenericOpConverter linalg::GenericOpAdaptor adaptor(operands, op.getOperation()->getAttrDictionary()); - // TODO: support ops with reduction. - if (!op.init_tensors().empty()) - return failure(); - // All inputs need to be turned into buffers first. Until then, bail out. if (llvm::any_of(adaptor.inputs(), [](Value in) { return !in.getType().isa(); })) return failure(); + // All init_tensors need to be turned into buffers first. Until then, bail + // out. + if (llvm::any_of(adaptor.init_tensors(), + [](Value in) { return !in.getType().isa(); })) + return failure(); + Location loc = op.getLoc(); - SmallVector outputBuffers, newOutputBuffers; - outputBuffers.assign(adaptor.output_buffers().begin(), - adaptor.output_buffers().end()); + SmallVector newOutputBuffers; newOutputBuffers.reserve(op.getNumOutputs()); newOutputBuffers.append(adaptor.output_buffers().begin(), adaptor.output_buffers().end()); // Update all types to memref types. - for (Type t : op.getResultTypes()) { - auto type = t.cast(); + // Assume the init tensors fold onto the first results. + // TODO: update this assumption because the reality is more complex under + // linalg on tensor based transformations. + for (auto en : llvm::enumerate(op.getResultTypes())) { + auto type = en.value().cast(); if (!type.hasStaticShape()) return rewriter.notifyMatchFailure( op, "dynamic shapes not currently supported"); auto memrefType = MemRefType::get(type.getShape(), type.getElementType()); - auto alloc = rewriter.create(loc, memrefType); - newOutputBuffers.push_back(alloc); + bool foldedInitTensor = en.index() < op.getNumInitTensors(); + if (foldedInitTensor) { + // Dealing with an init tensor requires distinguishing between 1-use + // and many-use cases which would create aliasing and WAR hazards. + Value initTensor = op.getInitTensor(en.index()); + Value initBuffer = adaptor.init_tensors()[en.index()]; + if (initTensor.hasOneUse()) { + newOutputBuffers.push_back(initBuffer); + continue; + } + auto alloc = rewriter.create(loc, memrefType); + rewriter.create(loc, initBuffer, alloc); + newOutputBuffers.push_back(alloc); + } else { + auto alloc = rewriter.create(loc, memrefType); + newOutputBuffers.push_back(alloc); + } } // Generate a new linalg operation that works on buffers. @@ -82,8 +100,12 @@ class GenericOpConverter Block *newBlock = rewriter.createBlock(&newRegion, newRegion.begin(), oldBlock.getArgumentTypes()); - // Add the result arguments to the new block. - for (Value v : newOutputBuffers) + // Add the result arguments that do not come from init_tensors to the new + // block. + // TODO: update this assumption because the reality is more complex under + // linalg on tensor based transformations. + for (Value v : + ValueRange(newOutputBuffers).drop_front(adaptor.init_tensors().size())) newBlock->addArgument(v.getType().cast().getElementType()); // Clone the body of the old block to the new block. diff --git a/mlir/test/Transforms/buffer-placement-preparation.mlir b/mlir/test/Transforms/buffer-placement-preparation.mlir index 4fcd225abc7ee..ac3ec12462118 100644 --- a/mlir/test/Transforms/buffer-placement-preparation.mlir +++ b/mlir/test/Transforms/buffer-placement-preparation.mlir @@ -382,3 +382,141 @@ func @decompose_tuple_typed_function_args_and_results(%arg0: tuple, %arg // CHECK-NEXT: linalg.copy(%[[SECOND_TUPLE_SECOND_ELEM]], %[[RESULT0]]) // CHECK-NEXT: linalg.copy(%[[ARG2]], %[[RESULT1]]) // CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_SECOND_ELEM]] + +// ----- + +#accesses = [ + affine_map<(i, j, k) -> (j, i, k)>, + affine_map<(i, j, k) -> (i, j)> +] + +#trait = { + indexing_maps = #accesses, + iterator_types = ["parallel", "parallel", "reduction"] +} + +func @generic_with_init_tensor( + %arg0: tensor<2x3x4xvector<3x4xi4>>, %arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>) { + + %0 = linalg.generic #trait + ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>) + init(%arg1 : tensor<3x2xf32>) { + ^bb(%v0: vector<3x4xi4>, %v1: f32) : + %f0 = constant 0.0 : f32 + linalg.yield %f0 : f32 + } -> tensor<3x2xf32> + + return %0 : tensor<3x2xf32> +} +// CHECK-LABEL: func @generic_with_init_tensor +// CHECK-SAME: (%[[ARG0:.*]]: memref<2x3x4xvector<3x4xi4>>, %[[ARG1:.*]]: memref<3x2xf32>, %[[RESULT0:.*]]: memref<3x2xf32>) { +// CHECK-NEXT: linalg.generic +// CHECK: linalg.copy(%[[ARG1]], %[[RESULT0]]) +// CHECK-NEXT: return +// CHECK-NOT: % + +// ----- + +#accesses = [ + affine_map<(i, j, k) -> (j, i, k)>, + affine_map<(i, j, k) -> (i, j)> +] + +#trait = { + indexing_maps = #accesses, + iterator_types = ["parallel", "parallel", "reduction"] +} + +func @init_tensor_with_2_uses( + %arg0: tensor<2x3x4xvector<3x4xi4>>, %arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>, tensor<3x2xf32>) { + + %0 = linalg.generic #trait + ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>) + init(%arg1 : tensor<3x2xf32>) { + ^bb(%v0: vector<3x4xi4>, %v1: f32) : + %f0 = constant 0.0 : f32 + linalg.yield %f0 : f32 + } -> tensor<3x2xf32> + + %1 = linalg.generic #trait + ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>) + init(%arg1 : tensor<3x2xf32>) { + ^bb(%v0: vector<3x4xi4>, %v1: f32) : + %f0 = constant 0.0 : f32 + linalg.yield %f0 : f32 + } -> tensor<3x2xf32> + + return %0, %1 : tensor<3x2xf32>, tensor<3x2xf32> +} +// CHECK-LABEL: func @init_tensor_with_2_uses +// CHECK-SAME: (%[[ARG0:.*]]: memref<2x3x4xvector<3x4xi4>>, %[[ARG1:.*]]: memref<3x2xf32>, %[[RESULT0:.*]]: memref<3x2xf32>, %[[RESULT1:.*]]: memref<3x2xf32>) { +// CHECK-NEXT: %[[ALLOC0:.*]] = alloc +// CHECK-NEXT: linalg.copy(%[[ARG1]], %[[ALLOC0]]) +// CHECK-NEXT: linalg.generic +// CHECK-SAME: outs(%[[ALLOC0]] +// CHECK-NEXT: ^bb +// CHECK-NEXT: constant +// CHECK-NEXT: yield +// CHECK-NEXT: } +// CHECK-NEXT: %[[ALLOC1:.*]] = alloc +// CHECK-NEXT: linalg.copy(%[[ARG1]], %[[ALLOC1]]) +// CHECK-NEXT: linalg.generic +// CHECK-SAME: outs(%[[ALLOC1]] +// CHECK-NEXT: ^bb +// CHECK-NEXT: constant +// CHECK-NEXT: yield +// CHECK-NEXT: } +// CHECK-NEXT: linalg.copy(%[[ALLOC0]], %[[RESULT0]]) +// CHECK-NEXT: linalg.copy(%[[ALLOC1]], %[[RESULT1]]) +// CHECK-NEXT: return +// CHECK-NOT: % + +// ----- + +#accesses = [ + affine_map<(i, j, k) -> (j, i, k)>, + affine_map<(i, j, k) -> (i, j)> +] + +#trait = { + indexing_maps = #accesses, + iterator_types = ["parallel", "parallel", "reduction"] +} + +func @init_tensor_with_1_use_def_chain( + %arg0: tensor<2x3x4xvector<3x4xi4>>, %arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>) { + + %0 = linalg.generic #trait + ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>) + init(%arg1 : tensor<3x2xf32>) { + ^bb(%v0: vector<3x4xi4>, %v1: f32) : + %f0 = constant 0.0 : f32 + linalg.yield %f0 : f32 + } -> tensor<3x2xf32> + + %1 = linalg.generic #trait + ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>) + init(%0 : tensor<3x2xf32>) { + ^bb(%v0: vector<3x4xi4>, %v1: f32) : + %f0 = constant 0.0 : f32 + linalg.yield %f0 : f32 + } -> tensor<3x2xf32> + + return %1 : tensor<3x2xf32> +} +// CHECK-LABEL: func @init_tensor_with_1_use_def_chain +// CHECK-SAME: (%[[ARG0:.*]]: memref<2x3x4xvector<3x4xi4>>, %[[ARG1:.*]]: memref<3x2xf32>, %[[RESULT0:.*]]: memref<3x2xf32>) { +// CHECK-NEXT: linalg.generic +// CHECK-NEXT: ^bb +// CHECK-NEXT: constant +// CHECK-NEXT: yield +// CHECK-NEXT: } +// CHECK-NEXT: linalg.generic +// CHECK-NEXT: ^bb +// CHECK-NEXT: constant +// CHECK-NEXT: yield +// CHECK-NEXT: } +// CHECK-NEXT: linalg.copy(%[[ARG1]], %[[RESULT0]]) +// CHECK-NEXT: return +// CHECK-NOT: % + diff --git a/mlir/test/lib/Transforms/TestBufferPlacement.cpp b/mlir/test/lib/Transforms/TestBufferPlacement.cpp index dd6629e80a93a..3b31ac0d1a701 100644 --- a/mlir/test/lib/Transforms/TestBufferPlacement.cpp +++ b/mlir/test/lib/Transforms/TestBufferPlacement.cpp @@ -56,34 +56,53 @@ struct TestBufferPlacementPreparationPass linalg::GenericOpAdaptor adaptor(operands, op.getOperation()->getAttrDictionary()); - // TODO: support ops with reduction. - if (!op.init_tensors().empty()) - return failure(); - // All inputs need to be turned into buffers first. Until then, bail out. if (llvm::any_of(adaptor.inputs(), [](Value in) { return !in.getType().isa(); })) return failure(); + // All init_tensors need to be turned into buffers first. Until then, bail + // out. + if (llvm::any_of(adaptor.init_tensors(), [](Value in) { + return !in.getType().isa(); + })) + return failure(); + Location loc = op.getLoc(); - SmallVector outputBuffers, newOutputBuffers; - outputBuffers.assign(adaptor.output_buffers().begin(), - adaptor.output_buffers().end()); + SmallVector newOutputBuffers; newOutputBuffers.reserve(op.getNumOutputs()); newOutputBuffers.append(adaptor.output_buffers().begin(), adaptor.output_buffers().end()); // Update all types to memref types. - for (Type t : op.getResultTypes()) { - auto type = t.cast(); + // Assume the init tensors fold onto the first results. + // TODO: update this assumption because the reality is more complex under + // linalg on tensor based transformations. + for (auto en : llvm::enumerate(op.getResultTypes())) { + auto type = en.value().cast(); if (!type.hasStaticShape()) return rewriter.notifyMatchFailure( op, "dynamic shapes not currently supported"); auto memrefType = MemRefType::get(type.getShape(), type.getElementType()); - auto alloc = rewriter.create(loc, memrefType); - newOutputBuffers.push_back(alloc); + bool foldedInitTensor = en.index() < op.getNumInitTensors(); + if (foldedInitTensor) { + // Dealing with an init tensor requires distinguishing between 1-use + // and many-use cases which would create aliasing and WAR hazards. + Value initTensor = op.getInitTensor(en.index()); + Value initBuffer = adaptor.init_tensors()[en.index()]; + if (initTensor.hasOneUse()) { + newOutputBuffers.push_back(initBuffer); + continue; + } + auto alloc = rewriter.create(loc, memrefType); + rewriter.create(loc, initBuffer, alloc); + newOutputBuffers.push_back(alloc); + } else { + auto alloc = rewriter.create(loc, memrefType); + newOutputBuffers.push_back(alloc); + } } // Generate a new linalg operation that works on buffers. @@ -101,8 +120,12 @@ struct TestBufferPlacementPreparationPass Block *newBlock = rewriter.createBlock(&newRegion, newRegion.begin(), oldBlock.getArgumentTypes()); - // Add the result arguments to the new block. - for (Value v : newOutputBuffers) + // Add the result arguments that do not come from init_tensors to the new + // block. + // TODO: update this assumption because the reality is more complex under + // linalg on tensor based transformations. + for (Value v : ValueRange(newOutputBuffers) + .drop_front(adaptor.init_tensors().size())) newBlock->addArgument(v.getType().cast().getElementType()); // Clone the body of the old block to the new block. From e2452f57faa916866a99126d2337bd82a9e0a06d Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Tue, 6 Oct 2020 15:30:20 +0300 Subject: [PATCH 136/321] [AMDGPU][MC] Added detection of unsupported instructions Implemented identification of unsupported instructions; improved errors reporting. See bug 42590. Reviewers: rampitec Differential Revision: https://reviews.llvm.org/D88211 --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 114 +- llvm/test/MC/AMDGPU/dpp-err.s | 12 +- llvm/test/MC/AMDGPU/ds.s | 4 +- llvm/test/MC/AMDGPU/flat-global.s | 108 +- .../MC/AMDGPU/flat-scratch-instructions.s | 66 +- llvm/test/MC/AMDGPU/flat.s | 14 +- llvm/test/MC/AMDGPU/fma-mix.s | 48 +- llvm/test/MC/AMDGPU/gfx1011_err.s | 22 +- llvm/test/MC/AMDGPU/gfx1030_err.s | 72 +- llvm/test/MC/AMDGPU/gfx10_asm_err.s | 7 +- llvm/test/MC/AMDGPU/gfx10_unsupported.s | 1102 ++++++ llvm/test/MC/AMDGPU/gfx7_unsupported.s | 3149 +++++++++++++++++ llvm/test/MC/AMDGPU/gfx8_unsupported.s | 1814 ++++++++++ llvm/test/MC/AMDGPU/gfx9_unsupported.s | 1043 ++++++ .../AMDGPU/invalid-instructions-spellcheck.s | 24 +- llvm/test/MC/AMDGPU/literals.s | 85 +- llvm/test/MC/AMDGPU/mad-mix.s | 48 +- llvm/test/MC/AMDGPU/mai-err.s | 184 +- llvm/test/MC/AMDGPU/mubuf-gfx9.s | 20 +- llvm/test/MC/AMDGPU/mubuf.s | 34 +- llvm/test/MC/AMDGPU/out-of-range-registers.s | 12 +- llvm/test/MC/AMDGPU/smem.s | 49 +- llvm/test/MC/AMDGPU/sop1.s | 4 +- llvm/test/MC/AMDGPU/sopc.s | 40 +- llvm/test/MC/AMDGPU/sopk.s | 2 +- llvm/test/MC/AMDGPU/sopp.s | 8 +- llvm/test/MC/AMDGPU/vop1-gfx9-err.s | 26 +- llvm/test/MC/AMDGPU/vop2.s | 68 +- llvm/test/MC/AMDGPU/vop3-errs.s | 26 +- llvm/test/MC/AMDGPU/vop3-gfx9.s | 233 +- llvm/test/MC/AMDGPU/vop3.s | 130 +- llvm/test/MC/AMDGPU/vop_dpp.s | 140 +- llvm/test/MC/AMDGPU/vop_sdwa.s | 312 +- llvm/test/MC/AMDGPU/wave32.s | 96 +- llvm/test/MC/AMDGPU/xdl-insts-err.s | 24 +- 35 files changed, 8193 insertions(+), 947 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/gfx10_unsupported.s create mode 100644 llvm/test/MC/AMDGPU/gfx7_unsupported.s create mode 100644 llvm/test/MC/AMDGPU/gfx8_unsupported.s create mode 100644 llvm/test/MC/AMDGPU/gfx9_unsupported.s diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index e1369e8f5c95f..fae814a7871d4 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1246,6 +1246,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool isForcedDPP() const { return ForcedDPP; } bool isForcedSDWA() const { return ForcedSDWA; } ArrayRef getMatchedVariants() const; + StringRef getMatchedVariantName() const; std::unique_ptr parseRegister(bool RestoreOnFailure = false); bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, @@ -1369,6 +1370,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; + bool isSupportedMnemo(StringRef Mnemo, + const FeatureBitset &FBS); + bool isSupportedMnemo(StringRef Mnemo, + const FeatureBitset &FBS, + ArrayRef Variants); + bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); + bool isId(const StringRef Id) const; bool isId(const AsmToken &Token, const StringRef Id) const; bool isToken(const AsmToken::TokenKind Kind) const; @@ -2837,6 +2845,15 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { return Match_Success; } +static ArrayRef getAllVariants() { + static const unsigned Variants[] = { + AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, + AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP + }; + + return makeArrayRef(Variants); +} + // What asm variants we should check ArrayRef AMDGPUAsmParser::getMatchedVariants() const { if (getForcedEncodingSize() == 32) { @@ -2860,12 +2877,23 @@ ArrayRef AMDGPUAsmParser::getMatchedVariants() const { return makeArrayRef(Variants); } - static const unsigned Variants[] = { - AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, - AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP - }; + return getAllVariants(); +} - return makeArrayRef(Variants); +StringRef AMDGPUAsmParser::getMatchedVariantName() const { + if (getForcedEncodingSize() == 32) + return "e32"; + + if (isForcedVOP3()) + return "e64"; + + if (isForcedSDWA()) + return "sdwa"; + + if (isForcedDPP()) + return "dpp"; + + return ""; } unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { @@ -3753,6 +3781,57 @@ static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID = 0); +static bool AMDGPUCheckMnemonic(StringRef Mnemonic, + const FeatureBitset &AvailableFeatures, + unsigned VariantID); + +bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, + const FeatureBitset &FBS) { + return isSupportedMnemo(Mnemo, FBS, getAllVariants()); +} + +bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, + const FeatureBitset &FBS, + ArrayRef Variants) { + for (auto Variant : Variants) { + if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) + return true; + } + + return false; +} + +bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, + const SMLoc &IDLoc) { + FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + + // Check if requested instruction variant is supported. + if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) + return false; + + // This instruction is not supported. + // Clear any other pending errors because they are no longer relevant. + getParser().clearPendingErrors(); + + // Requested instruction variant is not supported. + // Check if any other variants are supported. + StringRef VariantName = getMatchedVariantName(); + if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { + return Error(IDLoc, + Twine(VariantName, + " variant of this instruction is not supported")); + } + + // Finally check if this instruction is supported on any other GPU. + if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { + return Error(IDLoc, "instruction not supported on this GPU"); + } + + // Instruction not supported on any GPU. Probably a typo. + std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); + return Error(IDLoc, "invalid instruction" + Suggestion); +} + bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -3782,27 +3861,26 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, break; } - switch (Result) { - default: break; - case Match_Success: + if (Result == Match_Success) { if (!validateInstruction(Inst, IDLoc, Operands)) { return true; } Inst.setLoc(IDLoc); Out.emitInstruction(Inst, getSTI()); return false; + } + + StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); + if (checkUnsupportedInstruction(Mnemo, IDLoc)) { + return true; + } + switch (Result) { + default: break; case Match_MissingFeature: + // FIXME: this case should be analyzed and error message corrected. return Error(IDLoc, "instruction not supported on this GPU"); - case Match_MnemonicFail: { - FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); - std::string Suggestion = AMDGPUMnemonicSpellCheck( - ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); - return Error(IDLoc, "invalid instruction" + Suggestion, - ((AMDGPUOperand &)*Operands[0]).getLocRange()); - } - case Match_InvalidOperand: { SMLoc ErrorLoc = IDLoc; if (ErrorInfo != ~0ULL) { @@ -3819,6 +3897,8 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_PreferE32: return Error(IDLoc, "internal error: instruction without _e64 suffix " "should be encoded as e32"); + case Match_MnemonicFail: + llvm_unreachable("Invalid instructions should have been handled already"); } llvm_unreachable("Implement any new match types added!"); } @@ -4771,6 +4851,7 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, Parser.Lex(); if (Res != MatchOperand_Success) { + checkUnsupportedInstruction(Name, NameLoc); if (!Parser.hasPendingError()) { // FIXME: use real operand location rather than the current location. StringRef Msg = @@ -7469,6 +7550,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { #define GET_REGISTER_MATCHER #define GET_MATCHER_IMPLEMENTATION #define GET_MNEMONIC_SPELL_CHECKER +#define GET_MNEMONIC_CHECKER #include "AMDGPUGenAsmMatcher.inc" // This fuction should be defined after auto-generated include so that we have diff --git a/llvm/test/MC/AMDGPU/dpp-err.s b/llvm/test/MC/AMDGPU/dpp-err.s index 19d896d82d592..7323ace70c358 100644 --- a/llvm/test/MC/AMDGPU/dpp-err.s +++ b/llvm/test/MC/AMDGPU/dpp-err.s @@ -14,25 +14,25 @@ v_mov_b32_dpp v0, v1 row_xmask:1 row_mask:0x1 bank_mask:0x1 // GFX10: v_mov_b32_dpp v0, v1 row_xmask:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x61,0x01,0x11] v_mov_b32_dpp v0, v1 wave_shl:1 row_mask:0x1 bank_mask:0x1 -// GFX89: v0, v1 wave_shl:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x30,0x01,0x11] +// GFX89: v_mov_b32_dpp v0, v1 wave_shl:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x30,0x01,0x11] // GFX10-ERR: error: not a valid operand. v_mov_b32_dpp v0, v1 wave_shr:1 row_mask:0x1 bank_mask:0x1 -// GFX89: v0, v1 wave_shr:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x38,0x01,0x11] +// GFX89: v_mov_b32_dpp v0, v1 wave_shr:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x38,0x01,0x11] // GFX10-ERR: error: not a valid operand. v_mov_b32_dpp v0, v1 wave_rol:1 row_mask:0x1 bank_mask:0x1 -// GFX89: v0, v1 wave_rol:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x34,0x01,0x11] +// GFX89: v_mov_b32_dpp v0, v1 wave_rol:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x34,0x01,0x11] // GFX10-ERR: error: not a valid operand. v_mov_b32_dpp v0, v1 wave_ror:1 row_mask:0x1 bank_mask:0x1 -// GFX89: v0, v1 wave_ror:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x3c,0x01,0x11] +// GFX89: v_mov_b32_dpp v0, v1 wave_ror:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x3c,0x01,0x11] // GFX10-ERR: error: not a valid operand. v_mov_b32_dpp v0, v1 row_bcast:15 row_mask:0x1 bank_mask:0x1 -// GFX89: v0, v1 row_bcast:15 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x42,0x01,0x11] +// GFX89: v_mov_b32_dpp v0, v1 row_bcast:15 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x42,0x01,0x11] // GFX10-ERR: error: not a valid operand. v_mov_b32_dpp v0, v1 row_bcast:31 row_mask:0x1 bank_mask:0x1 -// GFX89: v0, v1 row_bcast:31 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x43,0x01,0x11] +// GFX89: v_mov_b32_dpp v0, v1 row_bcast:31 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x43,0x01,0x11] // GFX10-ERR: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/ds.s b/llvm/test/MC/AMDGPU/ds.s index 25c3cdd38830b..a618e9027f406 100644 --- a/llvm/test/MC/AMDGPU/ds.s +++ b/llvm/test/MC/AMDGPU/ds.s @@ -16,11 +16,11 @@ ds_add_u32 v2, v4 offset:16 // VI: ds_add_u32 v2, v4 offset:16 ; encoding: [0x10,0x00,0x00,0xd8,0x02,0x04,0x00,0x00] ds_add_src2_f32 v255 offset:65535 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI: ds_add_src2_f32 v255 offset:65535 ; encoding: [0xff,0xff,0x2a,0xd9,0xff,0x00,0x00,0x00] ds_add_src2_f32 v0 offset:4 gds -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI: ds_add_src2_f32 v0 offset:4 gds ; encoding: [0x04,0x00,0x2b,0xd9,0x00,0x00,0x00,0x00] //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/flat-global.s b/llvm/test/MC/AMDGPU/flat-global.s index 7a1d3333fb73d..e6c25f3f83f6a 100644 --- a/llvm/test/MC/AMDGPU/flat-global.s +++ b/llvm/test/MC/AMDGPU/flat-global.s @@ -13,7 +13,7 @@ global_load_ubyte v1, v[3:4], off global_load_ubyte v1, v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x20,0xdc,0x03,0x00,0x7d,0x01] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU global_load_sbyte v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x24,0xdc,0x03,0x00,0x7d,0x01] @@ -23,7 +23,7 @@ global_load_sbyte v1, v[3:4], off global_load_sbyte v1, v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x24,0xdc,0x03,0x00,0x7d,0x01] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU global_load_ushort v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x28,0xdc,0x03,0x00,0x7d,0x01] @@ -33,7 +33,7 @@ global_load_ushort v1, v[3:4], off global_load_ushort v1, v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x28,0xdc,0x03,0x00,0x7d,0x01] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU global_load_sshort v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x2c,0xdc,0x03,0x00,0x7d,0x01] @@ -43,7 +43,7 @@ global_load_sshort v1, v[3:4], off global_load_sshort v1, v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x2c,0xdc,0x03,0x00,0x7d,0x01] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU global_load_dword v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x7d,0x01] @@ -53,7 +53,7 @@ global_load_dword v1, v[3:4], off global_load_dword v1, v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x30,0xdc,0x03,0x00,0x7d,0x01] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU global_load_dwordx2 v[1:2], v[3:4], off // GFX10: encoding: [0x00,0x80,0x34,0xdc,0x03,0x00,0x7d,0x01] @@ -63,7 +63,7 @@ global_load_dwordx2 v[1:2], v[3:4], off global_load_dwordx2 v[1:2], v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x34,0xdc,0x03,0x00,0x7d,0x01] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU global_load_dwordx3 v[1:3], v[3:4], off // GFX10: encoding: [0x00,0x80,0x3c,0xdc,0x03,0x00,0x7d,0x01] @@ -73,7 +73,7 @@ global_load_dwordx3 v[1:3], v[3:4], off global_load_dwordx3 v[1:3], v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x3c,0xdc,0x03,0x00,0x7d,0x01] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU global_load_dwordx4 v[1:4], v[3:4], off // GFX10: encoding: [0x00,0x80,0x38,0xdc,0x03,0x00,0x7d,0x01] @@ -83,38 +83,38 @@ global_load_dwordx4 v[1:4], v[3:4], off global_load_dwordx4 v[1:4], v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x38,0xdc,0x03,0x00,0x7d,0x01] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU // FIXME: VI error should be instruction nto supported global_load_dword v1, v[3:4], off offset:0 // GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dword v1, v[3:4], off ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: :35: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU global_load_dword v1, v[3:4], off offset:4095 // GFX10-ERR: :35: error: expected a 12-bit signed offset // GFX9: global_load_dword v1, v[3:4], off offset:4095 ; encoding: [0xff,0x8f,0x50,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: :35: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU global_load_dword v1, v[3:4], off offset:-1 // GFX10: encoding: [0xff,0x8f,0x30,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dword v1, v[3:4], off offset:-1 ; encoding: [0xff,0x9f,0x50,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: :35: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU global_load_dword v1, v[3:4], off offset:-4096 // GFX10-ERR: :35: error: expected a 12-bit signed offset // GFX9: global_load_dword v1, v[3:4], off offset:-4096 ; encoding: [0x00,0x90,0x50,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: :35: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU global_load_dword v1, v[3:4], off offset:4096 // GFX10-ERR: :35: error: expected a 12-bit signed offset // GFX9-ERR: :35: error: expected a 13-bit signed offset -// VI-ERR: :35: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU global_load_dword v1, v[3:4] off, offset:-4097 // GFX10-ERR: :35: error: expected a 12-bit signed offset // GFX9-ERR: :35: error: expected a 13-bit signed offset -// VI-ERR: :35: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU global_store_byte v[3:4], v1, off // GFX10: encoding: [0x00,0x80,0x60,0xdc,0x03,0x01,0x7d,0x00] @@ -124,7 +124,7 @@ global_store_byte v[3:4], v1, off global_store_byte v[3:4], v1, off dlc // GFX10: encoding: [0x00,0x90,0x60,0xdc,0x03,0x01,0x7d,0x00] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU global_store_short v[3:4], v1, off // GFX10: encoding: [0x00,0x80,0x68,0xdc,0x03,0x01,0x7d,0x00] @@ -134,7 +134,7 @@ global_store_short v[3:4], v1, off global_store_short v[3:4], v1, off dlc // GFX10: encoding: [0x00,0x90,0x68,0xdc,0x03,0x01,0x7d,0x00] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU global_store_dword v[3:4], v1, off // GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00] @@ -144,7 +144,7 @@ global_store_dword v[3:4], v1, off global_store_dword v[3:4], v1, off dlc // GFX10: encoding: [0x00,0x90,0x70,0xdc,0x03,0x01,0x7d,0x00] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU global_store_dwordx2 v[3:4], v[1:2], off // GFX10: encoding: [0x00,0x80,0x74,0xdc,0x03,0x01,0x7d,0x00] @@ -154,7 +154,7 @@ global_store_dwordx2 v[3:4], v[1:2], off global_store_dwordx2 v[3:4], v[1:2], off dlc // GFX10: encoding: [0x00,0x90,0x74,0xdc,0x03,0x01,0x7d,0x00] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU global_store_dwordx3 v[3:4], v[1:3], off // GFX10: encoding: [0x00,0x80,0x7c,0xdc,0x03,0x01,0x7d,0x00] @@ -164,7 +164,7 @@ global_store_dwordx3 v[3:4], v[1:3], off global_store_dwordx3 v[3:4], v[1:3], off dlc // GFX10: encoding: [0x00,0x90,0x7c,0xdc,0x03,0x01,0x7d,0x00] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU global_store_dwordx4 v[3:4], v[1:4], off // GFX10: encoding: [0x00,0x80,0x78,0xdc,0x03,0x01,0x7d,0x00] @@ -174,12 +174,12 @@ global_store_dwordx4 v[3:4], v[1:4], off global_store_dwordx4 v[3:4], v[1:4], off dlc // GFX10: encoding: [0x00,0x90,0x78,0xdc,0x03,0x01,0x7d,0x00] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU global_store_dword v[3:4], v1, off offset:12 // GFX10: encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_dword v[3:4], v1, off offset:12 ; encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: :36: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_load_dword v1, v3, s[2:3] // GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x02,0x01] @@ -189,12 +189,12 @@ global_load_dword v1, v3, s[2:3] global_load_dword v1, v3, s[2:3] offset:24 // GFX10: encoding: [0x18,0x80,0x30,0xdc,0x03,0x00,0x02,0x01] // GFX9: global_load_dword v1, v3, s[2:3] offset:24 ; encoding: [0x18,0x80,0x50,0xdc,0x03,0x00,0x02,0x01] -// VI-ERR: :34: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU global_load_dword v1, v3, s[2:3] offset:-8 // GFX10: encoding: [0xf8,0x8f,0x30,0xdc,0x03,0x00,0x02,0x01] // GFX9: global_load_dword v1, v3, s[2:3] offset:-8 ; encoding: [0xf8,0x9f,0x50,0xdc,0x03,0x00,0x02,0x01] -// VI-ERR: :34: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU global_store_dword v3, v1, s[2:3] // GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00] @@ -204,12 +204,12 @@ global_store_dword v3, v1, s[2:3] global_store_dword v3, v1, s[2:3] offset:24 // GFX10: encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00] // GFX9: global_store_dword v3, v1, s[2:3] offset:24 ; encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00] -// VI-ERR: :35: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU global_store_dword v3, v1, s[2:3] offset:-8 // GFX10: encoding: [0xf8,0x8f,0x70,0xdc,0x03,0x01,0x02,0x00] // GFX9: global_store_dword v3, v1, s[2:3] offset:-8 ; encoding: [0xf8,0x9f,0x70,0xdc,0x03,0x01,0x02,0x00] -// VI-ERR: :35: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU // XXX: Is this valid? global_store_dword v3, v1, exec @@ -220,12 +220,12 @@ global_store_dword v3, v1, exec global_load_dword v1, v[3:4], s2 // GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: :31: error: invalid operand for instruction -// VI-ERR: :31: error: invalid operand for instruction +// VI-ERR: :1: error: instruction not supported on this GPU global_load_dword v1, v[3:4], exec_hi // GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: :31: error: invalid operand for instruction -// VI-ERR: :31: error: invalid operand for instruction +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_cmpswap v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0xc4,0xdc,0x03,0x05,0x7d,0x00] @@ -360,132 +360,132 @@ global_atomic_dec_x2 v[3:4], v[5:6], off global_atomic_cmpswap v[3:4], v[5:6], off offset:-16 // GFX10: encoding: [0xf0,0x8f,0xc4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_cmpswap v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x04,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :43: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_cmpswap_x2 v[3:4], v[5:8], off offset:-16 // GFX10: encoding: [0xf0,0x8f,0x44,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_cmpswap_x2 v[3:4], v[5:8], off offset:-16 ; encoding: [0xf0,0x9f,0x84,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :46: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_swap v[3:4], v5, off offset:-16 // GFX10: encoding: [0xf0,0x8f,0xc0,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_swap v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x00,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :36: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_swap_x2 v[3:4], v[5:6], off offset:-16 // GFX10: encoding: [0xf0,0x8f,0x40,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_swap_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x80,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :43: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_add v[3:4], v5, off offset:-16 // GFX10: encoding: [0xf0,0x8f,0xc8,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_add v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x08,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :35: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_sub v[3:4], v5, off offset:-16 // GFX10: encoding: [0xf0,0x8f,0xcc,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_sub v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x0c,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :35: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_smin v[3:4], v5, off offset:-16 // GFX10: encoding: [0xf0,0x8f,0xd4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smin v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x10,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :36: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_umin v[3:4], v5, off offset:-16 // GFX10: encoding: [0xf0,0x8f,0xd8,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umin v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x14,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :36: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_smax v[3:4], v5, off offset:-16 // GFX10: encoding: [0xf0,0x8f,0xdc,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smax v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x18,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :36: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_umax v[3:4], v5, off offset:-16 // GFX10: encoding: [0xf0,0x8f,0xe0,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umax v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x1c,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :36: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_and v[3:4], v5, off offset:-16 // GFX10: encoding: [0xf0,0x8f,0xe4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_and v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x20,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :35: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_or v[3:4], v5, off offset:-16 // GFX10: encoding: [0xf0,0x8f,0xe8,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_or v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x24,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :34: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_xor v[3:4], v5, off offset:-16 // GFX10: encoding: [0xf0,0x8f,0xec,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_xor v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x28,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :35: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_inc v[3:4], v5, off offset:-16 // GFX10: encoding: [0xf0,0x8f,0xf0,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_inc v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x2c,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :35: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_dec v[3:4], v5, off offset:-16 // GFX10: encoding: [0xf0,0x8f,0xf4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_dec v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x30,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :35: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_add_x2 v[3:4], v[5:6], off offset:-16 // GFX10: encoding: [0xf0,0x8f,0x48,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_add_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x88,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :42: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_sub_x2 v[3:4], v[5:6], off offset:-16 // GFX10: encoding: [0xf0,0x8f,0x4c,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_sub_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x8c,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :42: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_smin_x2 v[3:4], v[5:6], off offset:-16 // GFX10: encoding: [0xf0,0x8f,0x54,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smin_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x90,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :43: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_umin_x2 v[3:4], v[5:6], off offset:-16 // GFX10: encoding: [0xf0,0x8f,0x58,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umin_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x94,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :43: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_smax_x2 v[3:4], v[5:6], off offset:-16 // GFX10: encoding: [0xf0,0x8f,0x5c,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smax_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x98,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :43: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_umax_x2 v[3:4], v[5:6], off offset:-16 // GFX10: encoding: [0xf0,0x8f,0x60,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umax_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x9c,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :43: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_and_x2 v[3:4], v[5:6], off offset:-16 // GFX10: encoding: [0xf0,0x8f,0x64,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_and_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xa0,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :42: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_or_x2 v[3:4], v[5:6], off offset:-16 // GFX10: encoding: [0xf0,0x8f,0x68,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_or_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xa4,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :41: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_xor_x2 v[3:4], v[5:6], off offset:-16 // GFX10: encoding: [0xf0,0x8f,0x6c,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_xor_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xa8,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :42: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_inc_x2 v[3:4], v[5:6], off offset:-16 // GFX10: encoding: [0xf0,0x8f,0x70,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_inc_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xac,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :42: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_atomic_dec_x2 v[3:4], v[5:6], off offset:-16 // GFX10: encoding: [0xf0,0x8f,0x74,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_dec_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xb0,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: :42: error: not a valid operand +// VI-ERR: :1: error: instruction not supported on this GPU global_load_ubyte_d16 v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x80,0xdc,0x03,0x00,0x7d,0x01] @@ -530,4 +530,4 @@ global_store_short_d16_hi v[3:4], v1, off global_atomic_add v0, v[1:2], v2, off glc slc // GFX10: global_atomic_add v0, v[1:2], v2, off glc slc ; encoding: [0x00,0x80,0xcb,0xdc,0x01,0x02,0x7d,0x00] // GFX9: global_atomic_add v0, v[1:2], v2, off glc slc ; encoding: [0x00,0x80,0x0b,0xdd,0x01,0x02,0x7f,0x00] -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/flat-scratch-instructions.s b/llvm/test/MC/AMDGPU/flat-scratch-instructions.s index fb795105419ce..a967b883079a9 100644 --- a/llvm/test/MC/AMDGPU/flat-scratch-instructions.s +++ b/llvm/test/MC/AMDGPU/flat-scratch-instructions.s @@ -13,7 +13,7 @@ scratch_load_ubyte v1, v2, off scratch_load_ubyte v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x20,0xdc,0x02,0x00,0x7d,0x01] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_load_sbyte v1, v2, off // GFX10: encoding: [0x00,0x40,0x24,0xdc,0x02,0x00,0x7d,0x01] @@ -23,7 +23,7 @@ scratch_load_sbyte v1, v2, off scratch_load_sbyte v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x24,0xdc,0x02,0x00,0x7d,0x01] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_load_ushort v1, v2, off // GFX10: encoding: [0x00,0x40,0x28,0xdc,0x02,0x00,0x7d,0x01] @@ -33,7 +33,7 @@ scratch_load_ushort v1, v2, off scratch_load_ushort v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x28,0xdc,0x02,0x00,0x7d,0x01] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_load_sshort v1, v2, off // GFX10: encoding: [0x00,0x40,0x2c,0xdc,0x02,0x00,0x7d,0x01] @@ -43,7 +43,7 @@ scratch_load_sshort v1, v2, off scratch_load_sshort v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x2c,0xdc,0x02,0x00,0x7d,0x01] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_load_dword v1, v2, off // GFX10: encoding: [0x00,0x40,0x30,0xdc,0x02,0x00,0x7d,0x01] @@ -53,7 +53,7 @@ scratch_load_dword v1, v2, off scratch_load_dword v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x30,0xdc,0x02,0x00,0x7d,0x01] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_load_dwordx2 v[1:2], v3, off // GFX10: encoding: [0x00,0x40,0x34,0xdc,0x03,0x00,0x7d,0x01] @@ -63,7 +63,7 @@ scratch_load_dwordx2 v[1:2], v3, off scratch_load_dwordx2 v[1:2], v3, off dlc // GFX10: encoding: [0x00,0x50,0x34,0xdc,0x03,0x00,0x7d,0x01] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_load_dwordx3 v[1:3], v4, off // GFX10: encoding: [0x00,0x40,0x3c,0xdc,0x04,0x00,0x7d,0x01] @@ -73,7 +73,7 @@ scratch_load_dwordx3 v[1:3], v4, off scratch_load_dwordx3 v[1:3], v4, off dlc // GFX10: encoding: [0x00,0x50,0x3c,0xdc,0x04,0x00,0x7d,0x01] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_load_dwordx4 v[1:4], v5, off // GFX10: encoding: [0x00,0x40,0x38,0xdc,0x05,0x00,0x7d,0x01] @@ -83,57 +83,57 @@ scratch_load_dwordx4 v[1:4], v5, off scratch_load_dwordx4 v[1:4], v5, off dlc // GFX10: encoding: [0x00,0x50,0x38,0xdc,0x05,0x00,0x7d,0x01] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_load_dword v1, v2, off offset:0 // GFX10: encoding: [0x00,0x40,0x30,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_dword v1, v2, off ; encoding: [0x00,0x40,0x50,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: error: not a valid operand. +// VI-ERR: error: instruction not supported on this GPU scratch_load_dword v1, v2, off offset:4095 // GFX10-ERR: :32: error: expected a 12-bit signed offset // GFX9: scratch_load_dword v1, v2, off offset:4095 ; encoding: [0xff,0x4f,0x50,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: :32: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU scratch_load_dword v1, v2, off offset:-1 // GFX10: encoding: [0xff,0x4f,0x30,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_dword v1, v2, off offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: error: not a valid operand. +// VI-ERR: error: instruction not supported on this GPU scratch_load_dword v1, v2, off offset:-4096 // GFX10-ERR: :32: error: expected a 12-bit signed offset // GFX9: scratch_load_dword v1, v2, off offset:-4096 ; encoding: [0x00,0x50,0x50,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: :32: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU scratch_load_dword v1, v2, off offset:4096 // GFX10-ERR: :32: error: expected a 12-bit signed offset // GFX9-ERR: :32: error: expected a 13-bit signed offset -// VI-ERR: :32: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU scratch_load_dword v1, v2, off offset:-4097 // GFX10-ERR: :32: error: expected a 12-bit signed offset // GFX9-ERR: :32: error: expected a 13-bit signed offset -// VI-ERR: :32: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU scratch_load_dword v0, v1, off offset:-2049 glc slc // GFX10-ERR: :32: error: expected a 12-bit signed offset // GFX9: scratch_load_dword v0, v1, off offset:-2049 glc slc ; encoding: [0xff,0x57,0x53,0xdc,0x01,0x00,0x7f,0x00] -// VI-ERR: :32: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU scratch_load_dword v0, v1, off offset:-2048 glc slc // GFX10: scratch_load_dword v0, v1, off offset:-2048 glc slc ; encoding: [0x00,0x48,0x33,0xdc,0x01,0x00,0x7d,0x00] // GFX9: scratch_load_dword v0, v1, off offset:-2048 glc slc ; encoding: [0x00,0x58,0x53,0xdc,0x01,0x00,0x7f,0x00] -// VI-ERR: :32: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU scratch_load_dword v255, off, s1 offset:2047 // GFX10: scratch_load_dword v255, off, s1 offset:2047 ; encoding: [0xff,0x47,0x30,0xdc,0x00,0x00,0x01,0xff] // GFX9: scratch_load_dword v255, off, s1 offset:2047 ; encoding: [0xff,0x47,0x50,0xdc,0x00,0x00,0x01,0xff] -// VI-ERR: :34: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU scratch_load_dword v255, off, s0 offset:2048 // GFX10-ERR: :34: error: expected a 12-bit signed offset // GFX9: scratch_load_dword v255, off, s0 offset:2048 ; encoding: [0x00,0x48,0x50,0xdc,0x00,0x00,0x00,0xff] -// VI-ERR: :34: error: not a valid operand. +// VI-ERR: :1: error: instruction not supported on this GPU scratch_store_byte v1, v2, off // GFX10: encoding: [0x00,0x40,0x60,0xdc,0x01,0x02,0x7d,0x00] @@ -143,7 +143,7 @@ scratch_store_byte v1, v2, off scratch_store_byte v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x60,0xdc,0x01,0x02,0x7d,0x00] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_store_short v1, v2, off // GFX10: encoding: [0x00,0x40,0x68,0xdc,0x01,0x02,0x7d,0x00] @@ -153,7 +153,7 @@ scratch_store_short v1, v2, off scratch_store_short v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x68,0xdc,0x01,0x02,0x7d,0x00] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_store_dword v1, v2, off // GFX10: encoding: [0x00,0x40,0x70,0xdc,0x01,0x02,0x7d,0x00] @@ -163,7 +163,7 @@ scratch_store_dword v1, v2, off scratch_store_dword v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x70,0xdc,0x01,0x02,0x7d,0x00] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_store_dwordx2 v1, v[2:3], off // GFX10: encoding: [0x00,0x40,0x74,0xdc,0x01,0x02,0x7d,0x00] @@ -173,7 +173,7 @@ scratch_store_dwordx2 v1, v[2:3], off scratch_store_dwordx2 v1, v[2:3], off dlc // GFX10: encoding: [0x00,0x50,0x74,0xdc,0x01,0x02,0x7d,0x00] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_store_dwordx3 v1, v[2:4], off // GFX10: encoding: [0x00,0x40,0x7c,0xdc,0x01,0x02,0x7d,0x00] @@ -183,7 +183,7 @@ scratch_store_dwordx3 v1, v[2:4], off scratch_store_dwordx3 v1, v[2:4], off dlc // GFX10: encoding: [0x00,0x50,0x7c,0xdc,0x01,0x02,0x7d,0x00] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_store_dwordx4 v1, v[2:5], off // GFX10: encoding: [0x00,0x40,0x78,0xdc,0x01,0x02,0x7d,0x00] @@ -193,12 +193,12 @@ scratch_store_dwordx4 v1, v[2:5], off scratch_store_dwordx4 v1, v[2:5], off dlc // GFX10: encoding: [0x00,0x50,0x78,0xdc,0x01,0x02,0x7d,0x00] // GFX9-ERR: error: failed parsing operand -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_store_dword v1, v2, off offset:12 // GFX10: encoding: [0x0c,0x40,0x70,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_dword v1, v2, off offset:12 ; encoding: [0x0c,0x40,0x70,0xdc,0x01,0x02,0x7f,0x00] -// VI-ERR: error: not a valid operand +// VI-ERR: error: instruction not supported on this GPU scratch_load_dword v1, off, s1 // GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x01,0x01] @@ -208,7 +208,7 @@ scratch_load_dword v1, off, s1 scratch_load_dword v1, off, s1 offset:32 // GFX10: encoding: [0x20,0x40,0x30,0xdc,0x00,0x00,0x01,0x01] // GFX9: scratch_load_dword v1, off, s1 offset:32 ; encoding: [0x20,0x40,0x50,0xdc,0x00,0x00,0x01,0x01] -// VI-ERR: error: not a valid operand +// VI-ERR: error: instruction not supported on this GPU scratch_store_dword off, v2, s1 // GFX10: encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x01,0x00] @@ -218,38 +218,38 @@ scratch_store_dword off, v2, s1 scratch_store_dword off, v2, s1 offset:12 // GFX10: encoding: [0x0c,0x40,0x70,0xdc,0x00,0x02,0x01,0x00] // GFX9: scratch_store_dword off, v2, s1 offset:12 ; encoding: [0x0c,0x40,0x70,0xdc,0x00,0x02,0x01,0x00] -// VI-ERR: error: not a valid operand +// VI-ERR: error: instruction not supported on this GPU // FIXME: Should error about multiple offsets scratch_load_dword v1, v2, s1 // GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: error: invalid operand for instruction -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_load_dword v1, v2, s1 offset:32 // GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: error: invalid operand for instruction -// VI-ERR: error: not a valid operand +// VI-ERR: error: instruction not supported on this GPU scratch_store_dword v1, v2, s1 // GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: error: invalid operand for instruction -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_store_dword v1, v2, s1 offset:32 // GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: error: invalid operand for instruction -// VI-ERR: error: not a valid operand +// VI-ERR: error: instruction not supported on this GPU scratch_load_dword v1, off, exec_hi // GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: error: invalid operand for instruction -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_store_dword off, v2, exec_hi // GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: error: invalid operand for instruction -// VI-ERR: error: invalid operand for instruction +// VI-ERR: error: instruction not supported on this GPU scratch_load_dword v1, off, exec_lo // GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x7e,0x01] diff --git a/llvm/test/MC/AMDGPU/flat.s b/llvm/test/MC/AMDGPU/flat.s index bfb71c9ebf4d2..31dd4f0500f1e 100644 --- a/llvm/test/MC/AMDGPU/flat.s +++ b/llvm/test/MC/AMDGPU/flat.s @@ -21,12 +21,12 @@ flat_load_dword v1, v[3:4] // VI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x00,0x01] flat_load_dword v1, v[3:4] glc -// NOSI: error: invalid operand for instruction +// NOSI: error: instruction not supported on this GPU // CI: flat_load_dword v1, v[3:4] glc ; encoding: [0x00,0x00,0x31,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_dword v1, v[3:4] glc ; encoding: [0x00,0x00,0x51,0xdc,0x03,0x00,0x00,0x01] flat_load_dword v1, v[3:4] glc slc -// NOSI: error: invalid operand for instruction +// NOSI: error: instruction not supported on this GPU // CI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x00,0x01] @@ -35,16 +35,16 @@ flat_store_dword v[3:4], v1 // CIVI: flat_store_dword v[3:4], v1 ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00] flat_store_dword v[3:4], v1 glc -// NOSI: error: invalid operand for instruction +// NOSI: error: instruction not supported on this GPU // CIVI: flat_store_dword v[3:4], v1 glc ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x00,0x00] flat_store_dword v[3:4], v1 glc slc -// NOSI: error: invalid operand for instruction +// NOSI: error: instruction not supported on this GPU // CIVI: flat_store_dword v[3:4], v1 glc slc ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x00,0x00] flat_store_dword v[3:4], v1 slc -// NOSI: error: invalid operand for instruction +// NOSI: error: instruction not supported on this GPU // CIVI: flat_store_dword v[3:4], v1 slc ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x00,0x00] // FIXME: For atomic instructions, glc must be placed immediately following @@ -53,12 +53,12 @@ flat_store_dword v[3:4], v1 slc // flat_atomic_add v1, v[3:4], v5 slc glc flat_atomic_add v1, v[3:4], v5 offset:0 glc slc -// NOSI: error: not a valid operand. +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x00,0x01] flat_atomic_add v[3:4], v5 slc -// NOSI: error: invalid operand for instruction +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_add v[3:4], v5 slc ; encoding: [0x00,0x00,0xca,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_add v[3:4], v5 slc ; encoding: [0x00,0x00,0x0a,0xdd,0x03,0x05,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/fma-mix.s b/llvm/test/MC/AMDGPU/fma-mix.s index 3f510090ee58e..6bd293e467f94 100644 --- a/llvm/test/MC/AMDGPU/fma-mix.s +++ b/llvm/test/MC/AMDGPU/fma-mix.s @@ -20,57 +20,57 @@ v_fma_mixhi_f16 v0, v1, v2, v3 v_fma_mix_f32 v0, abs(v1), v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU // FIXME: Improve error messages v_fma_mix_f32 v0, v1, abs(v2), v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, |v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v0, v1, v2, abs(v3) // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v0, -v1, v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x24] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v0, v1, -v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x44] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v0, v1, v2, -v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x84] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v0, -abs(v1), v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, -|v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x24] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v0, v1, -abs(v2), v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, -|v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x44] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v0, v1, v2, -abs(v3) // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, -|v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x84] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mixlo_f16 v0, abs(v1), -v2, abs(v3) // GFX9-FMAMIX: v_fma_mixlo_f16 v0, |v1|, -v2, |v3| ; encoding: [0x00,0x05,0xa1,0xd3,0x01,0x05,0x0e,0x44] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mixhi_f16 v0, -v1, abs(v2), -abs(v3) // GFX9-FMAMIX: v_fma_mixhi_f16 v0, -v1, |v2|, -|v3| ; encoding: [0x00,0x06,0xa2,0xd3,0x01,0x05,0x0e,0xa4] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mixlo_f16 v0, v1, v2, v3 clamp // GFX9-FMAMIX: v_fma_mixlo_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa1,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-MADMIX-ERR: error: invalid operand for instruction +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mixhi_f16 v0, v1, v2, v3 clamp // GFX9-FMAMIX: v_fma_mixhi_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa2,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-MADMIX-ERR: error: invalid operand for instruction +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU // // op_sel with non-packed instructions @@ -78,25 +78,25 @@ v_fma_mixhi_f16 v0, v1, v2, v3 clamp v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU // FIXME: Improve error messages v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x08,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x10,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x00,0x20,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x00,0x38,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v0, v1, v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] @@ -104,24 +104,24 @@ v_fma_mix_f32 v0, v1, v2, v3 v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x0c] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x14] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x1c] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp // GFX9-FMAMIX: v_fma_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa1,0xd3,0x01,0x05,0x0e,0x0c] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp // GFX9-FMAMIX: v_fma_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa2,0xd3,0x01,0x05,0x0e,0x0c] -// GFX9-MADMIX-ERR: error: not a valid operand. +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1011_err.s b/llvm/test/MC/AMDGPU/gfx1011_err.s index 4b5bc2e5887af..d6c268eafbcd0 100644 --- a/llvm/test/MC/AMDGPU/gfx1011_err.s +++ b/llvm/test/MC/AMDGPU/gfx1011_err.s @@ -5,16 +5,16 @@ v_dot8c_i32_i4 v5, v1, v2 // GFX10: error: instruction not supported on this GPU v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_SHADER_CYCLES) // GFX10: error: specified hardware register is not supported on this GPU @@ -26,25 +26,25 @@ image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] // GFX10: error: instruction not supported on this GPU image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 -// GFX10: error: invalid operand +// GFX10: error: instruction not supported on this GPU image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] // GFX10: error: instruction not supported on this GPU image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16 -// GFX10: error: invalid operand +// GFX10: error: instruction not supported on this GPU image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D glc -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU image_msaa_load v5, v[1:2], s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_2D d16 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU image_msaa_load v14, [v204,v11,v14,v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1030_err.s b/llvm/test/MC/AMDGPU/gfx1030_err.s index b8e1afdfdb5b0..b8a1cb3efec33 100644 --- a/llvm/test/MC/AMDGPU/gfx1030_err.s +++ b/llvm/test/MC/AMDGPU/gfx1030_err.s @@ -5,16 +5,16 @@ v_dot8c_i32_i4 v5, v1, v2 // GFX10: error: instruction not supported on this GPU v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU s_get_waveid_in_workgroup s0 // GFX10: error: instruction not supported on this GPU @@ -44,97 +44,97 @@ v_mac_legacy_f32 v0, v1, v2 // GFX10: error: instruction not supported on this GPU ds_add_src2_u32 v1 offset:65535 gds -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_add_src2_u32 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_add_src2_f32 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_sub_src2_u32 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_rsub_src2_u32 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_inc_src2_u32 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_dec_src2_u32 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_min_src2_i32 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_max_src2_i32 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_min_src2_u32 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_max_src2_u32 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_and_src2_b32 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_or_src2_b32 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_xor_src2_b32 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_min_src2_f32 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_max_src2_f32 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_add_src2_u64 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_sub_src2_u64 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_rsub_src2_u64 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_inc_src2_u64 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_dec_src2_u64 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_min_src2_i64 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_max_src2_i64 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_min_src2_u64 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_max_src2_u64 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_and_src2_b64 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_or_src2_b64 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_xor_src2_b64 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_min_src2_f64 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_max_src2_f64 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_write_src2_b32 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU ds_write_src2_b64 v1 offset:65535 -// GFX10: error: not a valid operand. +// GFX10: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_err.s b/llvm/test/MC/AMDGPU/gfx10_asm_err.s index 978ec345f2b05..ed33a55fb953e 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_err.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_err.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx601 %s 2>&1 | FileCheck --check-prefixes=GFX6-7,GFX6-8,GFX6-9 --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx701 %s 2>&1 | FileCheck --check-prefixes=GFX6-7,GFX6-8,GFX6-9 --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx801 %s 2>&1 | FileCheck --check-prefixes=GFX6-8,GFX6-9 --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefixes=GFX6-9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx801 %s 2>&1 | FileCheck --check-prefixes=GFX6-8,GFX6-9,GFX8-9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefixes=GFX6-9,GFX8-9 --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10 --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10 --implicit-check-not=error: %s @@ -271,4 +271,5 @@ s_endpgm_saved //===----------------------------------------------------------------------===// v_mov_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] -// GFX6-9: error: not a valid operand +// GFX6-7: error: dpp variant of this instruction is not supported +// GFX8-9: error: not a valid operand diff --git a/llvm/test/MC/AMDGPU/gfx10_unsupported.s b/llvm/test/MC/AMDGPU/gfx10_unsupported.s new file mode 100644 index 0000000000000..34a9d4686e204 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx10_unsupported.s @@ -0,0 +1,1102 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s + +//===----------------------------------------------------------------------===// +// Unsupported instructions. +//===----------------------------------------------------------------------===// + +buffer_atomic_add_f32 v255, off, s[8:11], s3 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_atomic_pk_add_f16 v255, off, s[8:11], s3 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_store_lds_dword s[4:7], s0 lds +// CHECK: error: instruction not supported on this GPU + +buffer_wbinvl1_vol +// CHECK: error: instruction not supported on this GPU + +global_atomic_add_f32 v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_pk_add_f16 v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +s_cbranch_g_fork -1, s[4:5] +// CHECK: error: instruction not supported on this GPU + +s_cbranch_i_fork exec, 12609 +// CHECK: error: instruction not supported on this GPU + +s_cbranch_join 1 +// CHECK: error: instruction not supported on this GPU + +s_dcache_inv_vol +// CHECK: error: instruction not supported on this GPU + +s_dcache_wb_vol +// CHECK: error: instruction not supported on this GPU + +s_rfe_restore_b64 -1, s2 +// CHECK: error: instruction not supported on this GPU + +s_set_gpr_idx_idx -1 +// CHECK: error: instruction not supported on this GPU + +s_set_gpr_idx_mode 0 +// CHECK: error: instruction not supported on this GPU + +s_set_gpr_idx_off +// CHECK: error: instruction not supported on this GPU + +s_set_gpr_idx_on -1, 0x0 +// CHECK: error: instruction not supported on this GPU + +s_setvskip -1, s2 +// CHECK: error: instruction not supported on this GPU + +v_accvgpr_read_b32 a0, a0 +// CHECK: error: instruction not supported on this GPU + +v_accvgpr_write_b32 a0, 65 +// CHECK: error: instruction not supported on this GPU + +v_add_i16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_i32 lds_direct, v0, v0 +// CHECK: error: instruction not supported on this GPU + +v_add_i32_e32 v0, vcc, 0.5, v0 +// CHECK: error: instruction not supported on this GPU + +v_add_i32_e64 v1, s[0:1], v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_add_u16 v0, (i1+100)*2, v0 +// CHECK: error: instruction not supported on this GPU + +v_add_u16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_add_u16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_u16_sdwa v0, scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_add_u32 v0, execz, v0 +// CHECK: error: instruction not supported on this GPU + +v_add_u32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_add_u32_e32 v1, s1, v3 +// CHECK: error: instruction not supported on this GPU + +v_add_u32_e64 v0, scc, v0 +// CHECK: error: instruction not supported on this GPU + +v_add_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_addc_co_u32 v0, vcc, shared_base, v0, vcc +// CHECK: error: instruction not supported on this GPU + +v_addc_co_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_addc_co_u32_e32 v3, vcc, 12345, v3, vcc +// CHECK: error: instruction not supported on this GPU + +v_addc_co_u32_e64 v255, s[12:13], v1, v2, s[6:7] +// CHECK: error: instruction not supported on this GPU + +v_addc_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_addc_u32 v0, vcc, exec_hi, v0, vcc +// CHECK: error: instruction not supported on this GPU + +v_addc_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_addc_u32_e32 v1, -1, v2, v3, s0 +// CHECK: error: instruction not supported on this GPU + +v_addc_u32_e64 v0, s[0:1], s0, s0, s[0:1] +// CHECK: error: instruction not supported on this GPU + +v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_ashr_i32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_ashr_i32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_ashr_i64 v[254:255], v[1:2], v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_f_i16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_f_i16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_f_i16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_f_u16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_f_u16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_f_u16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_t_i16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_t_i16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_t_i16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_t_u16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_t_u16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_t_u16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmps_eq_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_eq_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_eq_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_eq_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_f_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_f_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_f_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_f_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_ge_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_ge_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_ge_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_ge_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_gt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_gt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_gt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_gt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_le_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_le_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_le_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_le_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_lg_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_lg_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_lg_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_lg_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_lt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_lt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_lt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_lt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_neq_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_neq_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_neq_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_neq_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nge_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nge_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nge_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nge_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_ngt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_ngt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_ngt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_ngt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nle_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nle_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nle_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nle_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlg_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlg_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlg_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlg_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_o_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_o_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_o_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_o_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_tru_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_tru_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_tru_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_tru_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_u_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_u_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_u_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_u_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_eq_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_eq_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_eq_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_eq_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_f_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_f_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_f_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_f_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ge_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ge_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ge_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ge_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_gt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_gt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_gt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_gt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_le_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_le_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_le_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_le_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lg_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lg_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lg_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lg_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_neq_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_neq_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_neq_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_neq_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nge_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nge_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nge_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nge_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ngt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ngt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ngt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ngt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nle_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nle_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nle_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nle_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlg_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlg_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlg_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlg_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_o_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_o_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_o_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_o_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_tru_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_tru_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_tru_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_tru_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_u_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_u_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_u_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_u_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpx_f_i16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_f_i16_e64 exec, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_f_i16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_f_u16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_f_u16_e64 exec, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_f_u16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_t_i16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_t_i16_e64 exec, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_t_i16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_t_u16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_t_u16_e64 exec, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_t_u16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cvt_pkaccum_u8_f32 v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_cvt_pkaccum_u8_f32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_div_fixup_legacy_f16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_div_fixup_legacy_f16_e64 v5, 0.5, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot2_f32_f16 v0, -v1, -v2, -v3 +// CHECK: error: instruction not supported on this GPU + +v_dot2_i32_i16 v0, -v1, -v2, -v3 +// CHECK: error: instruction not supported on this GPU + +v_dot2_u32_u16 v0, -v1, -v2, -v3 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_f32_f16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_f32_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_f32_f16_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_i32_i16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_i32_i16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_dot4_i32_i8 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot4_u32_u8 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot4c_i32_i8 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot4c_i32_i8_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_dot4c_i32_i8_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot8_i32_i4 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot8_u32_u4 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot8c_i32_i4 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot8c_i32_i4_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_exp_legacy_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_exp_legacy_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_exp_legacy_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_exp_legacy_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_fma_legacy_f16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_fma_legacy_f16_e64 v5, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_interp_p2_legacy_f16 v255, v2, attr0.x, v3 +// CHECK: error: instruction not supported on this GPU + +v_log_clamp_f32 v1, 0.5 +// CHECK: error: instruction not supported on this GPU + +v_log_clamp_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_log_legacy_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_log_legacy_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_log_legacy_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_log_legacy_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_lshl_b32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_lshl_b32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_lshl_b64 v[254:255], v[1:2], v2 +// CHECK: error: instruction not supported on this GPU + +v_lshr_b32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_lshr_b32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_lshr_b64 v[254:255], v[1:2], v2 +// CHECK: error: instruction not supported on this GPU + +v_mac_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_mac_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_mac_f16_e64 v0, -4.0, flat_scratch_lo +// CHECK: error: instruction not supported on this GPU + +v_mac_f16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_mad_f16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_f16_e64 v5, 0.5, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_f16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_f16_e64 v5, 0.5, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_i16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_i16_e64 v5, 0, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_u16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_u16_e64 v5, 0, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_mix_f32 v0, -abs(v1), v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_mixhi_f16 v0, -v1, abs(v2), -abs(v3) +// CHECK: error: instruction not supported on this GPU + +v_mad_mixlo_f16 v0, abs(v1), -v2, abs(v3) +// CHECK: error: instruction not supported on this GPU + +v_madak_f16 v0, src_lds_direct, v0, 0x1121 +// CHECK: error: instruction not supported on this GPU + +v_madmk_f16 v0, src_lds_direct, 0x1121, v0 +// CHECK: error: instruction not supported on this GPU + +v_max_legacy_f32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_max_legacy_f32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x1f32 a[0:31], 1, v1, a[1:32] +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_16x16x16i8 a[0:3], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_min_legacy_f32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_min_legacy_f32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_rcp_clamp_f32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rcp_clamp_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rcp_clamp_f64 v[254:255], v[1:2] +// CHECK: error: instruction not supported on this GPU + +v_rcp_clamp_f64_e64 v[254:255], v[1:2] +// CHECK: error: instruction not supported on this GPU + +v_rcp_legacy_f32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rcp_legacy_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rsq_clamp_f32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rsq_clamp_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rsq_clamp_f64 v[254:255], v[1:2] +// CHECK: error: instruction not supported on this GPU + +v_rsq_clamp_f64_e64 v[254:255], v[1:2] +// CHECK: error: instruction not supported on this GPU + +v_rsq_legacy_f32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rsq_legacy_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_screen_partition_4se_b32 v5, -1 +// CHECK: error: instruction not supported on this GPU + +v_screen_partition_4se_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// CHECK: error: instruction not supported on this GPU + +v_screen_partition_4se_b32_e64 v5, -1 +// CHECK: error: instruction not supported on this GPU + +v_screen_partition_4se_b32_sdwa v5, v1 src0_sel:BYTE_0 +// CHECK: error: instruction not supported on this GPU + +v_sub_i16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_i32 v1, s[0:1], v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_sub_i32_e64 v255, s[12:13], v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_sub_u16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_sub_u16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_sub_u32 v1, 4.0, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_u32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_sub_u32_e32 v1, s1, v3 +// CHECK: error: instruction not supported on this GPU + +v_sub_u32_e64 v255, s[12:13], v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_subb_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 +// CHECK: error: instruction not supported on this GPU + +v_subb_co_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_subb_co_u32_e64 v255, s[12:13], v1, v2, s[6:7] +// CHECK: error: instruction not supported on this GPU + +v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_subb_u32 v1, s[0:1], v2, v3, vcc +// CHECK: error: instruction not supported on this GPU + +v_subb_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_subb_u32_e64 v255, s[12:13], v1, v2, s[6:7] +// CHECK: error: instruction not supported on this GPU + +v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_subbrev_co_u32 v0, vcc, src_lds_direct, v0, vcc +// CHECK: error: instruction not supported on this GPU + +v_subbrev_co_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_subbrev_co_u32_e64 v255, s[12:13], v1, v2, s[6:7] +// CHECK: error: instruction not supported on this GPU + +v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_subbrev_u32 v1, s[0:1], v2, v3, vcc +// CHECK: error: instruction not supported on this GPU + +v_subbrev_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_subbrev_u32_e64 v255, s[12:13], v1, v2, s[6:7] +// CHECK: error: instruction not supported on this GPU + +v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_i32 v1, s[0:1], v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_subrev_i32_e64 v255, s[12:13], v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_u16 v0, src_lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_subrev_u16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_subrev_u16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_subrev_u32 v0, src_lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_subrev_u32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_subrev_u32_e32 v1, s1, v3 +// CHECK: error: instruction not supported on this GPU + +v_subrev_u32_e64 v255, s[12:13], v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +//===----------------------------------------------------------------------===// +// Unsupported e32 variants. +//===----------------------------------------------------------------------===// + +v_add_co_u32_e32 v2, vcc, s0, v2 +// CHECK: error: e32 variant of this instruction is not supported + +v_sub_co_u32_e32 v2, vcc, s0, v2 +// CHECK: error: e32 variant of this instruction is not supported + +v_subrev_co_u32_e32 v2, vcc, s0, v2 +// CHECK: error: e32 variant of this instruction is not supported + +//===----------------------------------------------------------------------===// +// Unsupported e64 variants. +//===----------------------------------------------------------------------===// + +v_swap_b32_e64 v1, v2 +// CHECK: error: e64 variant of this instruction is not supported + +//===----------------------------------------------------------------------===// +// Unsupported dpp variants. +//===----------------------------------------------------------------------===// + +v_add_co_u32_dpp v255, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_ashrrev_i16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_lshlrev_b16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_lshrrev_b16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_max_i16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_max_u16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_min_i16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_min_u16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_mul_lo_u16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_sub_co_u32_dpp v255, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_subrev_co_u32_dpp v255, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +//===----------------------------------------------------------------------===// +// Unsupported sdwa variants. +//===----------------------------------------------------------------------===// + +v_add_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +// CHECK: error: sdwa variant of this instruction is not supported + +v_ashrrev_i16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_lshlrev_b16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_lshrrev_b16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_mac_f32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_max_i16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_max_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_min_i16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_min_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_mul_lo_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_sub_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +// CHECK: error: sdwa variant of this instruction is not supported + +v_subrev_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +// CHECK: error: sdwa variant of this instruction is not supported diff --git a/llvm/test/MC/AMDGPU/gfx7_unsupported.s b/llvm/test/MC/AMDGPU/gfx7_unsupported.s new file mode 100644 index 0000000000000..81146340ad729 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx7_unsupported.s @@ -0,0 +1,3149 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck --implicit-check-not=error: %s + +//===----------------------------------------------------------------------===// +// Unsupported instructions. +//===----------------------------------------------------------------------===// + +buffer_atomic_add_f32 v255, off, s[8:11], s3 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_atomic_pk_add_f16 v255, off, s[8:11], s3 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_gl0_inv +// CHECK: error: instruction not supported on this GPU + +buffer_gl1_inv +// CHECK: error: instruction not supported on this GPU + +buffer_load_format_d16_hi_x v5, off, s[8:11], s3 +// CHECK: error: instruction not supported on this GPU + +buffer_load_format_d16_x v1, off, s[4:7], s1 +// CHECK: error: instruction not supported on this GPU + +buffer_load_format_d16_xy v1, off, s[4:7], s1 +// CHECK: error: instruction not supported on this GPU + +buffer_load_format_d16_xyz v[1:2], off, s[4:7], s1 +// CHECK: error: instruction not supported on this GPU + +buffer_load_format_d16_xyzw v[1:2], off, s[4:7], s1 +// CHECK: error: instruction not supported on this GPU + +buffer_load_sbyte_d16 v1, off, s[4:7], s1 +// CHECK: error: instruction not supported on this GPU + +buffer_load_sbyte_d16_hi v1, off, s[4:7], s1 +// CHECK: error: instruction not supported on this GPU + +buffer_load_short_d16 v1, off, s[4:7], s1 +// CHECK: error: instruction not supported on this GPU + +buffer_load_short_d16_hi v1, off, s[4:7], s1 +// CHECK: error: instruction not supported on this GPU + +buffer_load_ubyte_d16 v1, off, s[4:7], s1 +// CHECK: error: instruction not supported on this GPU + +buffer_load_ubyte_d16_hi v1, off, s[4:7], s1 +// CHECK: error: instruction not supported on this GPU + +buffer_store_byte_d16_hi v1, off, s[12:15], -1 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_store_format_d16_hi_x v1, off, s[12:15], s4 offset:4095 glc +// CHECK: error: instruction not supported on this GPU + +buffer_store_format_d16_x v1, off, s[12:15], -1 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_store_format_d16_xy v1, off, s[12:15], -1 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_store_format_d16_xyz v[1:2], off, s[12:15], -1 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_store_format_d16_xyzw v[1:2], off, s[12:15], -1 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_store_lds_dword s[4:7], s0 lds +// CHECK: error: instruction not supported on this GPU + +buffer_store_short_d16_hi v1, off, s[12:15], -1 offset:4095 +// CHECK: error: instruction not supported on this GPU + +ds_add_f32 v0, v1 +// CHECK: error: instruction not supported on this GPU + +ds_add_rtn_f32 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +ds_add_src2_f32 v0 offset:4 gds +// CHECK: error: instruction not supported on this GPU + +ds_bpermute_b32 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +ds_permute_b32 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +ds_read_addtid_b32 v255 offset:65535 +// CHECK: error: instruction not supported on this GPU + +ds_read_i8_d16 v255, v1 offset:65535 +// CHECK: error: instruction not supported on this GPU + +ds_read_i8_d16_hi v255, v1 offset:65535 +// CHECK: error: instruction not supported on this GPU + +ds_read_u16_d16 v255, v1 offset:65535 +// CHECK: error: instruction not supported on this GPU + +ds_read_u16_d16_hi v255, v1 offset:65535 +// CHECK: error: instruction not supported on this GPU + +ds_read_u8_d16 v255, v1 offset:65535 +// CHECK: error: instruction not supported on this GPU + +ds_read_u8_d16_hi v255, v1 offset:65535 +// CHECK: error: instruction not supported on this GPU + +ds_write_addtid_b32 v255 offset:65535 +// CHECK: error: instruction not supported on this GPU + +ds_write_b16_d16_hi v1, v2 +// CHECK: error: instruction not supported on this GPU + +ds_write_b8_d16_hi v1, v2 +// CHECK: error: instruction not supported on this GPU + +flat_load_sbyte_d16 v1, v[3:4] +// CHECK: error: instruction not supported on this GPU + +flat_load_sbyte_d16_hi v1, v[3:4] +// CHECK: error: instruction not supported on this GPU + +flat_load_short_d16 v1, v[3:4] +// CHECK: error: instruction not supported on this GPU + +flat_load_short_d16_hi v1, v[3:4] +// CHECK: error: instruction not supported on this GPU + +flat_load_ubyte_d16 v1, v[3:4] +// CHECK: error: instruction not supported on this GPU + +flat_load_ubyte_d16_hi v1, v[3:4] +// CHECK: error: instruction not supported on this GPU + +flat_store_byte_d16_hi v[1:2], v2 +// CHECK: error: instruction not supported on this GPU + +flat_store_short_d16_hi v[1:2], v2 +// CHECK: error: instruction not supported on this GPU + +global_atomic_add v0, v[1:2], v2, off glc slc +// CHECK: error: instruction not supported on this GPU + +global_atomic_add_f32 v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_add_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_and v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_and_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_cmpswap v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_cmpswap_x2 v[1:2], v[252:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_dec v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_dec_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_inc v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_inc_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_or v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_or_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_pk_add_f16 v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_smax v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_smax_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_smin v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_smin_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_sub v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_sub_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_swap v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_swap_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_umax v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_umax_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_umin v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_umin_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_xor v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_xor_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_load_dword v1, v3, s[2:3] +// CHECK: error: instruction not supported on this GPU + +global_load_dwordx2 v[1:2], v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_dwordx3 v[1:3], v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_dwordx4 v[1:4], v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_sbyte v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_sbyte_d16 v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_sbyte_d16_hi v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_short_d16 v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_short_d16_hi v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_sshort v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_ubyte v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_ubyte_d16 v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_ubyte_d16_hi v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_ushort v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_store_byte v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_store_byte_d16_hi v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_store_dword v254, v1, s[2:3] offset:16 +// CHECK: error: instruction not supported on this GPU + +global_store_dwordx2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_store_dwordx3 v[1:2], v[253:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_store_dwordx4 v[1:2], v[252:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_store_short v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_store_short_d16_hi v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +s_and_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_andn1_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_andn1_saveexec_b64 exec, s[2:3] +// CHECK: error: instruction not supported on this GPU + +s_andn1_wrexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_andn1_wrexec_b64 exec, s[2:3] +// CHECK: error: instruction not supported on this GPU + +s_andn2_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_andn2_wrexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_andn2_wrexec_b64 exec, s[2:3] +// CHECK: error: instruction not supported on this GPU + +s_atc_probe 0x0, s[4:5], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_atc_probe_buffer 0x0, s[8:11], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_add s5, s[2:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_atomic_add_x2 s[10:11], s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_and s5, s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_and_x2 s[10:11], s[2:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_atomic_cmpswap s[10:11], s[2:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_atomic_dec s5, s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_dec_x2 s[10:11], s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_inc s5, s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_inc_x2 s[10:11], s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_or s5, s[2:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_atomic_or_x2 s[10:11], s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_smax s5, s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_smax_x2 s[10:11], s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_smin s5, s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_smin_x2 s[10:11], s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_sub s5, s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_sub_x2 s[10:11], s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_swap s5, s[2:3], -1 +// CHECK: error: instruction not supported on this GPU + +s_atomic_swap_x2 s[10:11], s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_umax s5, s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_umax_x2 s[10:11], s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_umin s5, s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_umin_x2 s[10:11], s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_xor s5, s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_xor_x2 s[10:11], s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_bitreplicate_b64_b32 exec, s2 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_add s5, s[4:7], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_add_x2 s[10:11], s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_and s101, s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_and_x2 s[10:11], s[8:11], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_dec s5, s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_inc s101, s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_or s5, s[8:11], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_or_x2 s[10:11], s[96:99], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_smax s5, s[4:7], s101 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_smin s5, s[4:7], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_sub s5, s[4:7], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_swap s5, s[4:7], -1 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_umax s5, s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_umin s5, s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_xor s5, s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_buffer_store_dword exec_hi, s[0:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_store_dwordx2 exec, s[0:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_store_dwordx4 s[4:7], s[12:15], m0 +// CHECK: error: instruction not supported on this GPU + +s_call_b64 exec, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_clause 0x0 +// CHECK: error: instruction not supported on this GPU + +s_cmp_eq_u64 -1, s[4:5] +// CHECK: error: instruction not supported on this GPU + +s_cmp_lg_u64 -1, s[4:5] +// CHECK: error: instruction not supported on this GPU + +s_code_end +// CHECK: error: instruction not supported on this GPU + +s_dcache_discard s[2:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_dcache_discard_x2 s[2:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_dcache_wb +// CHECK: error: instruction not supported on this GPU + +s_dcache_wb_vol +// CHECK: error: instruction not supported on this GPU + +s_denorm_mode 0x0 +// CHECK: error: instruction not supported on this GPU + +s_endpgm_ordered_ps_done +// CHECK: error: instruction not supported on this GPU + +s_endpgm_saved +// CHECK: error: instruction not supported on this GPU + +s_get_waveid_in_workgroup s0 +// CHECK: error: instruction not supported on this GPU + +s_gl1_inv +// CHECK: error: instruction not supported on this GPU + +s_inst_prefetch 0x0 +// CHECK: error: instruction not supported on this GPU + +s_lshl1_add_u32 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_lshl2_add_u32 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_lshl3_add_u32 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_lshl4_add_u32 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_memrealtime exec +// CHECK: error: instruction not supported on this GPU + +s_movrelsd_2_b32 s0, s1 +// CHECK: error: instruction not supported on this GPU + +s_mul_hi_i32 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_mul_hi_u32 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_nand_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_nor_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_or_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_orn1_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_orn1_saveexec_b64 exec, s[2:3] +// CHECK: error: instruction not supported on this GPU + +s_orn2_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_pack_hh_b32_b16 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_pack_lh_b32_b16 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_pack_ll_b32_b16 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_rfe_restore_b64 -1, s2 +// CHECK: error: instruction not supported on this GPU + +s_round_mode 0x0 +// CHECK: error: instruction not supported on this GPU + +s_scratch_load_dword s5, s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_scratch_load_dwordx2 s[100:101], s[2:3], s0 +// CHECK: error: instruction not supported on this GPU + +s_scratch_load_dwordx4 s[20:23], s[4:5], s0 +// CHECK: error: instruction not supported on this GPU + +s_scratch_store_dword s1, s[4:5], 0x123 glc +// CHECK: error: instruction not supported on this GPU + +s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc +// CHECK: error: instruction not supported on this GPU + +s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_set_gpr_idx_idx -1 +// CHECK: error: instruction not supported on this GPU + +s_set_gpr_idx_mode 0 +// CHECK: error: instruction not supported on this GPU + +s_set_gpr_idx_off +// CHECK: error: instruction not supported on this GPU + +s_set_gpr_idx_on -1, 0x0 +// CHECK: error: instruction not supported on this GPU + +s_store_dword exec_hi, s[2:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_store_dwordx2 exec, s[2:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_store_dwordx4 s[4:7], flat_scratch, m0 +// CHECK: error: instruction not supported on this GPU + +s_subvector_loop_begin exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_subvector_loop_end exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_ttracedata_imm 0x0 +// CHECK: error: instruction not supported on this GPU + +s_version 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_waitcnt_expcnt exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_waitcnt_lgkmcnt exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_waitcnt_vmcnt exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_waitcnt_vscnt exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_wakeup +// CHECK: error: instruction not supported on this GPU + +s_xnor_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_xor_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +scratch_load_dword v0, v1, off offset:-2048 glc slc +// CHECK: error: instruction not supported on this GPU + +scratch_load_dwordx2 v[1:2], v3, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_dwordx3 v[1:3], v4, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_dwordx4 v[1:4], v5, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_sbyte v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_sbyte_d16 v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_sbyte_d16_hi v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_short_d16 v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_short_d16_hi v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_sshort v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_ubyte v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_ubyte_d16 v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_ubyte_d16_hi v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_ushort v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_store_byte off, v2, flat_scratch_hi offset:-1 +// CHECK: error: instruction not supported on this GPU + +scratch_store_byte_d16_hi off, v2, flat_scratch_hi offset:-1 +// CHECK: error: instruction not supported on this GPU + +scratch_store_dword off, v2, exec_hi +// CHECK: error: instruction not supported on this GPU + +scratch_store_dwordx2 off, v[254:255], s3 offset:-1 +// CHECK: error: instruction not supported on this GPU + +scratch_store_dwordx3 off, v[253:255], s3 offset:-1 +// CHECK: error: instruction not supported on this GPU + +scratch_store_dwordx4 off, v[252:255], s3 offset:-1 +// CHECK: error: instruction not supported on this GPU + +scratch_store_short off, v2, flat_scratch_hi offset:-1 +// CHECK: error: instruction not supported on this GPU + +scratch_store_short_d16_hi off, v2, flat_scratch_hi offset:-1 +// CHECK: error: instruction not supported on this GPU + +tbuffer_load_format_d16_x v0, off, s[0:3] +// CHECK: error: instruction not supported on this GPU + +tbuffer_load_format_d16_xy v0, off, s[0:3], format:22, 0 +// CHECK: error: instruction not supported on this GPU + +tbuffer_load_format_d16_xyz v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 +// CHECK: error: instruction not supported on this GPU + +tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], format:22, 0 +// CHECK: error: instruction not supported on this GPU + +tbuffer_store_format_d16_x v0, v1, s[4:7], format:33, 0 idxen +// CHECK: error: instruction not supported on this GPU + +tbuffer_store_format_d16_xy v0, v1, s[4:7], format:33, 0 idxen +// CHECK: error: instruction not supported on this GPU + +tbuffer_store_format_d16_xyz v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 +// CHECK: error: instruction not supported on this GPU + +tbuffer_store_format_d16_xyzw v[0:1], v2, s[4:7], format:33, 0 idxen +// CHECK: error: instruction not supported on this GPU + +v_accvgpr_read_b32 a0, a0 +// CHECK: error: instruction not supported on this GPU + +v_accvgpr_write_b32 a0, 65 +// CHECK: error: instruction not supported on this GPU + +v_add3_u32 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_add_co_ci_u32 v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_add_co_ci_u32_dpp v0, vcc, v0, v0, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// CHECK: error: instruction not supported on this GPU + +v_add_co_ci_u32_e32 v255, vcc, v1, v2, vcc +// CHECK: error: instruction not supported on this GPU + +v_add_co_ci_u32_e64 v255, s12, v1, v2, s6 +// CHECK: error: instruction not supported on this GPU + +v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_add_f16 v0, s[0:1], v0 +// CHECK: error: instruction not supported on this GPU + +v_add_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_add_f16_e32 v1, 64.0, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_f16_e64 v0, 0x3456, v0 +// CHECK: error: instruction not supported on this GPU + +v_add_f16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_add_i16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_lshl_u32 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_i16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_i32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_u16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_u32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_u32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_u32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_add_u16 v0, (i1+100)*2, v0 +// CHECK: error: instruction not supported on this GPU + +v_add_u16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_add_u16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_u16_sdwa v0, scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_add_u32 v0, execz, v0 +// CHECK: error: instruction not supported on this GPU + +v_add_u32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_add_u32_e32 v1, s1, v3 +// CHECK: error: instruction not supported on this GPU + +v_add_u32_e64 v0, scc, v0 +// CHECK: error: instruction not supported on this GPU + +v_add_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_addc_co_u32 v0, vcc, shared_base, v0, vcc +// CHECK: error: instruction not supported on this GPU + +v_addc_co_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_addc_co_u32_e32 v3, vcc, 12345, v3, vcc +// CHECK: error: instruction not supported on this GPU + +v_addc_co_u32_e64 v255, s[12:13], v1, v2, s[6:7] +// CHECK: error: instruction not supported on this GPU + +v_addc_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_and_or_b32 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_ashrrev_i16 v0, lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_ashrrev_i16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_ashrrev_i16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_ashrrev_i16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_ashrrev_i64 v[0:1], 0x100, s[0:1] +// CHECK: error: instruction not supported on this GPU + +v_ceil_f16 v0, -0.5 +// CHECK: error: instruction not supported on this GPU + +v_ceil_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_ceil_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_ceil_f16_e64 v0, -|v1| +// CHECK: error: instruction not supported on this GPU + +v_ceil_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_class_f16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_class_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_class_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_eq_f16 vcc, -1, v0 +// CHECK: error: instruction not supported on this GPU + +v_cmp_eq_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_eq_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_eq_i16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_eq_i16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_eq_i16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_eq_u16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_eq_u16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_eq_u16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_f_f16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_f_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_f_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_f_i16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_f_i16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_f_i16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_f_u16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_f_u16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_f_u16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_ge_f16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_ge_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_ge_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_ge_i16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_ge_i16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_ge_i16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_ge_u16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_ge_u16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_ge_u16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_gt_f16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_gt_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_gt_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_gt_i16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_gt_i16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_gt_i16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_gt_u16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_gt_u16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_gt_u16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_le_f16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_le_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_le_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_le_i16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_le_i16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_le_i16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_le_u16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_le_u16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_le_u16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_lg_f16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_lg_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_lg_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_lt_f16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_lt_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_lt_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_lt_i16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_lt_i16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_lt_i16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_lt_u16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_lt_u16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_lt_u16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_ne_i16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_ne_i16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_ne_i16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_ne_u16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_ne_u16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_ne_u16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_neq_f16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_neq_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_neq_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_nge_f16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_nge_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_nge_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_ngt_f16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_ngt_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_ngt_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_nle_f16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_nle_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_nle_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_nlg_f16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_nlg_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_nlg_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_nlt_f16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_nlt_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_nlt_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_o_f16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_o_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_o_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_t_i16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_t_i16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_t_i16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_t_u16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_t_u16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_t_u16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_tru_f16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_tru_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_tru_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmp_u_f16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_u_f16_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmp_u_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_class_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_class_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_class_f16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_eq_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_eq_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_eq_f16_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_eq_i16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_eq_i16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_eq_i16_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_eq_u16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_eq_u16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_eq_u16_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_f_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_f_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_f_f16_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_f_i16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_f_i16_e64 exec, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_f_i16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_f_u16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_f_u16_e64 exec, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_f_u16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ge_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ge_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ge_f16_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ge_i16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ge_i16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ge_i16_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ge_u16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ge_u16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ge_u16_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_gt_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_gt_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_gt_f16_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_gt_i16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_gt_i16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_gt_i16_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_gt_u16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_gt_u16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_gt_u16_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_le_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_le_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_le_f16_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_le_i16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_le_i16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_le_i16_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_le_u16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_le_u16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_le_u16_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_lg_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_lg_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_lg_f16_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_lt_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_lt_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_lt_f16_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_lt_i16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_lt_i16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_lt_i16_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_lt_u16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_lt_u16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_lt_u16_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ne_i16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ne_i16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ne_i16_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ne_u16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ne_u16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ne_u16_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_neq_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_neq_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_neq_f16_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_nge_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_nge_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_nge_f16_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ngt_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ngt_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_ngt_f16_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_nle_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_nle_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_nle_f16_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_nlg_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_nlg_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_nlg_f16_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_nlt_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_nlt_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_nlt_f16_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_o_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_o_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_o_f16_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_t_i16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_t_i16_e64 exec, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_t_i16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_t_u16 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_t_u16_e64 exec, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_t_u16_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_tru_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_tru_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_tru_f16_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cmpx_u_f16 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_u_f16_e64 -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpx_u_f16_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cos_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_cos_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_cos_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cos_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cos_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cvt_f16_i16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_f16_i16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_cvt_f16_i16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_f16_i16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_f16_i16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cvt_f16_u16 v0, src_lds_direct +// CHECK: error: instruction not supported on this GPU + +v_cvt_f16_u16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_cvt_f16_u16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_f16_u16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_f16_u16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cvt_i16_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_i16_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_cvt_i16_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_i16_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_i16_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_i16_f16 v5, -4.0 +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_i16_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_i16_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_i16_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_i16_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_u16_f16 v5, s101 +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_u16_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_u16_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_u16_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_u16_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cvt_pknorm_i16_f16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cvt_pknorm_u16_f16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cvt_u16_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_u16_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_cvt_u16_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_u16_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_u16_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_div_fixup_f16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_div_fixup_f16_e64 v5, 0.5, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_div_fixup_legacy_f16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_div_fixup_legacy_f16_e64 v5, 0.5, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot2_f32_f16 v0, -v1, -v2, -v3 +// CHECK: error: instruction not supported on this GPU + +v_dot2_i32_i16 v0, -v1, -v2, -v3 +// CHECK: error: instruction not supported on this GPU + +v_dot2_u32_u16 v0, -v1, -v2, -v3 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_f32_f16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_f32_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_f32_f16_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_i32_i16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_i32_i16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_dot4_i32_i8 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot4_u32_u8 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot4c_i32_i8 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot4c_i32_i8_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_dot4c_i32_i8_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot8_i32_i4 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot8_u32_u4 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot8c_i32_i4 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot8c_i32_i4_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_exp_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_exp_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_exp_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_exp_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_exp_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_floor_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_floor_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_floor_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_floor_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_floor_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_fma_f16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_fma_f16_e64 v5, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_fma_legacy_f16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_fma_legacy_f16_e64 v5, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_fma_mix_f32 v0, -abs(v1), v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_fma_mixhi_f16 v0, -v1, abs(v2), -abs(v3) +// CHECK: error: instruction not supported on this GPU + +v_fma_mixlo_f16 v0, abs(v1), -v2, abs(v3) +// CHECK: error: instruction not supported on this GPU + +v_fmaak_f32 v255, v1, v2, 0x1121 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f16 v5, 0x1234, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f16_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f32 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmamk_f32 v255, v1, 0x1121, v3 +// CHECK: error: instruction not supported on this GPU + +v_fract_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_fract_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_fract_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_fract_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_fract_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_frexp_exp_i16_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_frexp_exp_i16_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_frexp_exp_i16_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_frexp_exp_i16_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_frexp_exp_i16_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_frexp_mant_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_frexp_mant_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_frexp_mant_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_frexp_mant_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_frexp_mant_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_interp_p1ll_f16 v255, v2, attr0.x +// CHECK: error: instruction not supported on this GPU + +v_interp_p1lv_f16 v255, v2, attr0.x, v3 +// CHECK: error: instruction not supported on this GPU + +v_interp_p2_f16 v255, v2, attr0.x, v3 +// CHECK: error: instruction not supported on this GPU + +v_interp_p2_legacy_f16 v255, v2, attr0.x, v3 +// CHECK: error: instruction not supported on this GPU + +v_ldexp_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_ldexp_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_ldexp_f16_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_ldexp_f16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_ldexp_f16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_log_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_log_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_log_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_log_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_log_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_lshl_add_u32 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_lshl_or_b32 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_lshlrev_b16 v0, lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_lshlrev_b16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_lshlrev_b16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_lshlrev_b16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_lshlrev_b64 v[254:255], v1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_lshrrev_b16 v0, lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_lshrrev_b16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_lshrrev_b16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_lshrrev_b16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_lshrrev_b64 v[254:255], v1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_mac_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_mac_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_mac_f16_e64 v0, -4.0, flat_scratch_lo +// CHECK: error: instruction not supported on this GPU + +v_mac_f16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_mad_f16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_f16_e64 v5, 0.5, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_i16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_i16_e64 v5, -1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_i32_i16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_f16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_f16_e64 v5, 0.5, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_i16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_i16_e64 v5, 0, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_u16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_u16_e64 v5, 0, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_mix_f32 v0, -abs(v1), v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_mixhi_f16 v0, -v1, abs(v2), -abs(v3) +// CHECK: error: instruction not supported on this GPU + +v_mad_mixlo_f16 v0, abs(v1), -v2, abs(v3) +// CHECK: error: instruction not supported on this GPU + +v_mad_u16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_u16_e64 v5, -1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_u32_u16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_madak_f16 v0, src_lds_direct, v0, 0x1121 +// CHECK: error: instruction not supported on this GPU + +v_madmk_f16 v0, src_lds_direct, 0x1121, v0 +// CHECK: error: instruction not supported on this GPU + +v_max3_f16 v0, src_lds_direct, v0, v0 +// CHECK: error: instruction not supported on this GPU + +v_max3_i16 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_max3_u16 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_max_f16 v0, execz, v0 +// CHECK: error: instruction not supported on this GPU + +v_max_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_max_f16_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_max_f16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_max_f16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_max_i16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_max_i16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_max_i16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_max_i16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_max_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_max_u16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_max_u16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_max_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_med3_f16 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_med3_i16 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_med3_u16 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x1f32 a[0:31], 1, v1, a[1:32] +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_16x16x16i8 a[0:3], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_min3_f16 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_min3_i16 v0, src_lds_direct, v0, v0 +// CHECK: error: instruction not supported on this GPU + +v_min3_u16 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_min_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_min_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_min_f16_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_min_f16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_min_f16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_min_i16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_min_i16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_min_i16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_min_i16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_min_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_min_u16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_min_u16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_min_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_2_b32 v0, v255 dpp8:[7,6,5,4,3,2,1,0] +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_2_b32_dpp v0, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_2_b32_e32 v5, 1 +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_2_b32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_2_b32_sdwa v0, 0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_mul_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_mul_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_mul_f16_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_mul_f16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_mul_f16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_mul_lo_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_mul_lo_u16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_mul_lo_u16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_mul_lo_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_or3_b32 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_pack_b32_f16 v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_perm_b32 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_permlane16_b32 v0, lds_direct, s0, s0 +// CHECK: error: instruction not supported on this GPU + +v_permlanex16_b32 v0, lds_direct, s0, s0 +// CHECK: error: instruction not supported on this GPU + +v_pipeflush +// CHECK: error: instruction not supported on this GPU + +v_pipeflush_e64 +// CHECK: error: instruction not supported on this GPU + +v_pk_add_f16 v0, execz, v0 +// CHECK: error: instruction not supported on this GPU + +v_pk_add_i16 v0, src_lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_pk_add_u16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_ashrrev_i16 v0, lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_pk_fma_f16 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_pk_fmac_f16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_lshlrev_b16 v0, lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_pk_lshrrev_b16 v0, lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_pk_mad_i16 v0, src_lds_direct, v0, v0 +// CHECK: error: instruction not supported on this GPU + +v_pk_mad_u16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_pk_max_f16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_max_i16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_max_u16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_min_f16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_min_i16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_min_u16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_mul_f16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_mul_lo_u16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_sub_i16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_sub_u16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_rcp_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_rcp_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_rcp_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rcp_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rcp_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_rndne_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_rndne_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_rndne_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rndne_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rndne_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_rsq_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_rsq_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_rsq_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rsq_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rsq_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_sat_pk_u8_i16 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_sat_pk_u8_i16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_sat_pk_u8_i16_e64 v5, -1 +// CHECK: error: instruction not supported on this GPU + +v_sat_pk_u8_i16_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_screen_partition_4se_b32 v5, -1 +// CHECK: error: instruction not supported on this GPU + +v_screen_partition_4se_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// CHECK: error: instruction not supported on this GPU + +v_screen_partition_4se_b32_e64 v5, -1 +// CHECK: error: instruction not supported on this GPU + +v_screen_partition_4se_b32_sdwa v5, v1 src0_sel:BYTE_0 +// CHECK: error: instruction not supported on this GPU + +v_sin_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_sin_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_sin_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_sin_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_sin_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_sqrt_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_sqrt_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_sqrt_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_sqrt_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_sqrt_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_sub_co_ci_u32_dpp v0, vcc, v0, v0, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// CHECK: error: instruction not supported on this GPU + +v_sub_co_ci_u32_e32 v255, vcc, v1, v2, vcc +// CHECK: error: instruction not supported on this GPU + +v_sub_co_ci_u32_e64 v255, s12, v1, v2, s6 +// CHECK: error: instruction not supported on this GPU + +v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_sub_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_sub_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_sub_f16_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_f16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_f16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_sub_i16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_i16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_i32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_u16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_u32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_u32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_u32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_sub_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_sub_u16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_sub_u16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_sub_u32 v1, 4.0, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_u32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_sub_u32_e32 v1, s1, v3 +// CHECK: error: instruction not supported on this GPU + +v_sub_u32_e64 v255, s[12:13], v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_subb_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 +// CHECK: error: instruction not supported on this GPU + +v_subb_co_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_subb_co_u32_e64 v255, s[12:13], v1, v2, s[6:7] +// CHECK: error: instruction not supported on this GPU + +v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_subbrev_co_u32 v0, vcc, src_lds_direct, v0, vcc +// CHECK: error: instruction not supported on this GPU + +v_subbrev_co_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_subbrev_co_u32_e64 v255, s[12:13], v1, v2, s[6:7] +// CHECK: error: instruction not supported on this GPU + +v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_ci_u32 v0, vcc_lo, src_lds_direct, v0, vcc_lo +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_ci_u32_dpp v0, vcc, v0, v0, vcc dpp8:[7,6,5,4,3,2,1,0] +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_ci_u32_e32 v1, 0, v1 +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_ci_u32_e64 v255, s12, v1, v2, s6 +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_subrev_f16 v0, src_lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_subrev_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_subrev_f16_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_f16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_f16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_subrev_nc_u32 v0, src_lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_subrev_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// CHECK: error: instruction not supported on this GPU + +v_subrev_nc_u32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_nc_u32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_nc_u32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_subrev_u16 v0, src_lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_subrev_u16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_subrev_u16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_subrev_u32 v0, src_lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_subrev_u32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_subrev_u32_e32 v1, s1, v3 +// CHECK: error: instruction not supported on this GPU + +v_subrev_u32_e64 v255, s[12:13], v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_swap_b32 v1, 1 +// CHECK: error: instruction not supported on this GPU + +v_swap_b32_e32 v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_swaprel_b32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_trunc_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: instruction not supported on this GPU + +v_trunc_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_trunc_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_trunc_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_trunc_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_xad_u32 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_xnor_b32 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_xnor_b32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_xnor_b32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_xnor_b32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_xnor_b32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_xor3_b32 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +//===----------------------------------------------------------------------===// +// Unsupported e64 variants. +//===----------------------------------------------------------------------===// + +v_interp_mov_f32_e64 v255, p10, attr0.x +// CHECK: error: e64 variant of this instruction is not supported + +v_interp_p1_f32_e64 v255, v2, attr0.x +// CHECK: error: e64 variant of this instruction is not supported + +v_interp_p2_f32_e64 v255, v2, attr0.x +// CHECK: error: e64 variant of this instruction is not supported + +//===----------------------------------------------------------------------===// +// Unsupported dpp variants. +//===----------------------------------------------------------------------===// + +v_add_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_addc_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_and_b32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_ashrrev_i32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_bfrev_b32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_ceil_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_cos_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_cvt_f16_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_cvt_f32_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_cvt_f32_i32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_cvt_f32_u32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_cvt_f32_ubyte0_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_cvt_f32_ubyte1_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_cvt_f32_ubyte2_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_cvt_f32_ubyte3_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_cvt_flr_i32_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_cvt_i32_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_cvt_off_f32_i4_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_cvt_rpi_i32_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_cvt_u32_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_exp_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_exp_legacy_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_ffbh_i32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_ffbh_u32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_ffbl_b32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_floor_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_fract_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_frexp_exp_i32_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_frexp_mant_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_log_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_log_legacy_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_lshlrev_b32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_lshrrev_b32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_mac_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_max_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_max_i32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_max_u32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_min_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_min_i32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_min_u32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_mov_b32_dpp v0, v1 row_bcast:15 row_mask:0x1 bank_mask:0x1 +// CHECK: error: dpp variant of this instruction is not supported + +v_movreld_b32_dpp v1, v0 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_movrels_b32_dpp v1, v0 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// CHECK: error: dpp variant of this instruction is not supported + +v_movrelsd_b32_dpp v0, v255 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_mul_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_mul_hi_i32_i24_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_mul_hi_u32_u24_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_mul_i32_i24_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_mul_legacy_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_mul_u32_u24_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_not_b32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_or_b32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_rcp_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_rcp_iflag_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_rndne_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_rsq_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_sin_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_sqrt_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_sub_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_subb_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_subbrev_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_subrev_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_trunc_f32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_xor_b32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +//===----------------------------------------------------------------------===// +// Unsupported sdwa variants. +//===----------------------------------------------------------------------===// + +v_add_f32_sdwa v0, v0, v0 dst_unused:UNUSED_PAD src0_sel:WORD_1 +// CHECK: error: sdwa variant of this instruction is not supported + +v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: sdwa variant of this instruction is not supported + +v_and_b32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_ashrrev_i32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_bfrev_b32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_ceil_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_class_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_eq_f32_sdwa exec, s2, v2 src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_eq_i32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_eq_u32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_f_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_f_i32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_f_u32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_ge_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_ge_i32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_ge_u32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_gt_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_gt_i32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_gt_u32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_le_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_le_i32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_le_u32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_lg_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_lt_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_lt_i32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_lt_u32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_ne_i32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_ne_u32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_neq_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_nge_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_ngt_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_nle_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_nlg_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_nlt_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_o_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_t_i32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_t_u32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_tru_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmp_u_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_class_f32_sdwa flat_scratch, v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_eq_f32_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_eq_i32_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_eq_u32_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_f_f32_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_f_i32_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_f_u32_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_ge_f32_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_ge_i32_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_ge_u32_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_gt_f32_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_gt_i32_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_gt_u32_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_le_f32_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_le_i32_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_le_u32_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_lg_f32_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_lt_f32_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_lt_i32_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_lt_u32_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_ne_i32_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_ne_u32_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_neq_f32_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_nge_f32_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_ngt_f32_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_nle_f32_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_nlg_f32_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_nlt_f32_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_o_f32_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_t_i32_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_t_u32_sdwa exec_hi, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_tru_f32_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cmpx_u_f32_sdwa -v1, v2 src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cos_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cvt_f16_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cvt_f32_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cvt_f32_i32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cvt_f32_u32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cvt_f32_ubyte0_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cvt_f32_ubyte1_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cvt_f32_ubyte2_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cvt_f32_ubyte3_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cvt_flr_i32_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cvt_i32_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cvt_off_f32_i4_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cvt_rpi_i32_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_cvt_u32_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_exp_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_exp_legacy_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_ffbh_i32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_ffbh_u32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_ffbl_b32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_floor_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_fract_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_frexp_exp_i32_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_frexp_mant_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_log_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_log_legacy_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_lshlrev_b32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_lshrrev_b32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_mac_f32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_max_f32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_max_i32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_max_u32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_min_f32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_min_i32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_min_u32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_mov_b32_sdwa v1, sext(-2+i1) +// CHECK: error: sdwa variant of this instruction is not supported + +v_movreld_b32_sdwa v0, 64 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_movrels_b32_sdwa v0, 1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_movrelsd_b32_sdwa v0, 1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_mul_f32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_mul_hi_i32_i24_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_mul_hi_u32_u24_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_mul_i32_i24_sdwa v1, v2, v3 clamp +// CHECK: error: sdwa variant of this instruction is not supported + +v_mul_legacy_f32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_mul_u32_u24_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_nop_sdwa +// CHECK: error: sdwa variant of this instruction is not supported + +v_not_b32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_or_b32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_rcp_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_rcp_iflag_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_rndne_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_rsq_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_sin_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_sqrt_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_sub_f32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: sdwa variant of this instruction is not supported + +v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: sdwa variant of this instruction is not supported + +v_subrev_f32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_trunc_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_xor_b32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported diff --git a/llvm/test/MC/AMDGPU/gfx8_unsupported.s b/llvm/test/MC/AMDGPU/gfx8_unsupported.s new file mode 100644 index 0000000000000..47f0f35c4d841 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx8_unsupported.s @@ -0,0 +1,1814 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck --implicit-check-not=error: %s + +//===----------------------------------------------------------------------===// +// Unsupported instructions. +//===----------------------------------------------------------------------===// + +buffer_atomic_add_f32 v255, off, s[8:11], s3 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_atomic_fcmpswap v[0:1], off, s[0:3], s0 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_atomic_fcmpswap_x2 v[0:3], off, s[0:3], s0 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_atomic_fmax v0, off, s[0:3], s0 offset:4095 glc +// CHECK: error: instruction not supported on this GPU + +buffer_atomic_fmax_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_atomic_fmin v0, off, s[0:3], s0 +// CHECK: error: instruction not supported on this GPU + +buffer_atomic_fmin_x2 v[0:1], off, s[0:3], s0 offset:4095 slc +// CHECK: error: instruction not supported on this GPU + +buffer_atomic_pk_add_f16 v255, off, s[8:11], s3 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_gl0_inv +// CHECK: error: instruction not supported on this GPU + +buffer_gl1_inv +// CHECK: error: instruction not supported on this GPU + +buffer_load_format_d16_hi_x v5, off, s[8:11], s3 +// CHECK: error: instruction not supported on this GPU + +buffer_load_sbyte_d16 v1, off, s[4:7], s1 +// CHECK: error: instruction not supported on this GPU + +buffer_load_sbyte_d16_hi v1, off, s[4:7], s1 +// CHECK: error: instruction not supported on this GPU + +buffer_load_short_d16 v1, off, s[4:7], s1 +// CHECK: error: instruction not supported on this GPU + +buffer_load_short_d16_hi v1, off, s[4:7], s1 +// CHECK: error: instruction not supported on this GPU + +buffer_load_ubyte_d16 v1, off, s[4:7], s1 +// CHECK: error: instruction not supported on this GPU + +buffer_load_ubyte_d16_hi v1, off, s[4:7], s1 +// CHECK: error: instruction not supported on this GPU + +buffer_store_byte_d16_hi v1, off, s[12:15], -1 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_store_format_d16_hi_x v1, off, s[12:15], s4 offset:4095 glc +// CHECK: error: instruction not supported on this GPU + +buffer_store_short_d16_hi v1, off, s[12:15], -1 offset:4095 +// CHECK: error: instruction not supported on this GPU + +ds_read_addtid_b32 v255 offset:65535 +// CHECK: error: instruction not supported on this GPU + +ds_read_i8_d16 v255, v1 offset:65535 +// CHECK: error: instruction not supported on this GPU + +ds_read_i8_d16_hi v255, v1 offset:65535 +// CHECK: error: instruction not supported on this GPU + +ds_read_u16_d16 v255, v1 offset:65535 +// CHECK: error: instruction not supported on this GPU + +ds_read_u16_d16_hi v255, v1 offset:65535 +// CHECK: error: instruction not supported on this GPU + +ds_read_u8_d16 v255, v1 offset:65535 +// CHECK: error: instruction not supported on this GPU + +ds_read_u8_d16_hi v255, v1 offset:65535 +// CHECK: error: instruction not supported on this GPU + +ds_write_addtid_b32 v255 offset:65535 +// CHECK: error: instruction not supported on this GPU + +ds_write_b16_d16_hi v1, v2 +// CHECK: error: instruction not supported on this GPU + +ds_write_b8_d16_hi v1, v2 +// CHECK: error: instruction not supported on this GPU + +flat_atomic_fcmpswap v0, v[1:2], v[2:3] glc +// CHECK: error: instruction not supported on this GPU + +flat_atomic_fcmpswap_x2 v[0:1], v[1:2], v[2:5] glc +// CHECK: error: instruction not supported on this GPU + +flat_atomic_fmax v0, v[1:2], v2 glc +// CHECK: error: instruction not supported on this GPU + +flat_atomic_fmax_x2 v[0:1], v[1:2], v[2:3] glc +// CHECK: error: instruction not supported on this GPU + +flat_atomic_fmin v0, v[1:2], v2 glc +// CHECK: error: instruction not supported on this GPU + +flat_atomic_fmin_x2 v[0:1], v[1:2], v[2:3] glc +// CHECK: error: instruction not supported on this GPU + +flat_load_sbyte_d16 v1, v[3:4] +// CHECK: error: instruction not supported on this GPU + +flat_load_sbyte_d16_hi v1, v[3:4] +// CHECK: error: instruction not supported on this GPU + +flat_load_short_d16 v1, v[3:4] +// CHECK: error: instruction not supported on this GPU + +flat_load_short_d16_hi v1, v[3:4] +// CHECK: error: instruction not supported on this GPU + +flat_load_ubyte_d16 v1, v[3:4] +// CHECK: error: instruction not supported on this GPU + +flat_load_ubyte_d16_hi v1, v[3:4] +// CHECK: error: instruction not supported on this GPU + +flat_store_byte_d16_hi v[1:2], v2 +// CHECK: error: instruction not supported on this GPU + +flat_store_short_d16_hi v[1:2], v2 +// CHECK: error: instruction not supported on this GPU + +global_atomic_add v0, v[1:2], v2, off glc slc +// CHECK: error: instruction not supported on this GPU + +global_atomic_add_f32 v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_add_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_and v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_and_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_cmpswap v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_cmpswap_x2 v[1:2], v[252:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_dec v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_dec_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_inc v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_inc_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_or v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_or_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_pk_add_f16 v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_smax v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_smax_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_smin v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_smin_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_sub v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_sub_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_swap v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_swap_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_umax v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_umax_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_umin v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_umin_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_atomic_xor v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_xor_x2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_load_dword v1, v3, s[2:3] +// CHECK: error: instruction not supported on this GPU + +global_load_dwordx2 v[1:2], v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_dwordx3 v[1:3], v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_dwordx4 v[1:4], v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_sbyte v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_sbyte_d16 v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_sbyte_d16_hi v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_short_d16 v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_short_d16_hi v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_sshort v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_ubyte v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_ubyte_d16 v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_ubyte_d16_hi v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_load_ushort v1, v[3:4], off +// CHECK: error: instruction not supported on this GPU + +global_store_byte v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_store_byte_d16_hi v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_store_dword v254, v1, s[2:3] offset:16 +// CHECK: error: instruction not supported on this GPU + +global_store_dwordx2 v[1:2], v[254:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_store_dwordx3 v[1:2], v[253:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_store_dwordx4 v[1:2], v[252:255], off offset:-1 +// CHECK: error: instruction not supported on this GPU + +global_store_short v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_store_short_d16_hi v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +s_and_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_andn1_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_andn1_saveexec_b64 exec, s[2:3] +// CHECK: error: instruction not supported on this GPU + +s_andn1_wrexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_andn1_wrexec_b64 exec, s[2:3] +// CHECK: error: instruction not supported on this GPU + +s_andn2_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_andn2_wrexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_andn2_wrexec_b64 exec, s[2:3] +// CHECK: error: instruction not supported on this GPU + +s_atomic_add s5, s[2:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_atomic_add_x2 s[10:11], s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_and s5, s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_and_x2 s[10:11], s[2:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_atomic_cmpswap s[10:11], s[2:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_atomic_dec s5, s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_dec_x2 s[10:11], s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_inc s5, s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_inc_x2 s[10:11], s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_or s5, s[2:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_atomic_or_x2 s[10:11], s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_smax s5, s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_smax_x2 s[10:11], s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_smin s5, s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_smin_x2 s[10:11], s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_sub s5, s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_sub_x2 s[10:11], s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_swap s5, s[2:3], -1 +// CHECK: error: instruction not supported on this GPU + +s_atomic_swap_x2 s[10:11], s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_umax s5, s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_umax_x2 s[10:11], s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_umin s5, s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_umin_x2 s[10:11], s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_atomic_xor s5, s[2:3], s101 +// CHECK: error: instruction not supported on this GPU + +s_atomic_xor_x2 s[10:11], s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_bitreplicate_b64_b32 exec, s2 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_add s5, s[4:7], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_add_x2 s[10:11], s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_and s101, s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_and_x2 s[10:11], s[8:11], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_dec s5, s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_inc s101, s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_or s5, s[8:11], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_or_x2 s[10:11], s[96:99], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_smax s5, s[4:7], s101 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_smin s5, s[4:7], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_sub s5, s[4:7], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_swap s5, s[4:7], -1 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_umax s5, s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_umin s5, s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_xor s5, s[4:7], s0 +// CHECK: error: instruction not supported on this GPU + +s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_call_b64 exec, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_clause 0x0 +// CHECK: error: instruction not supported on this GPU + +s_code_end +// CHECK: error: instruction not supported on this GPU + +s_dcache_discard s[2:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_dcache_discard_x2 s[2:3], 0x0 +// CHECK: error: instruction not supported on this GPU + +s_denorm_mode 0x0 +// CHECK: error: instruction not supported on this GPU + +s_endpgm_ordered_ps_done +// CHECK: error: instruction not supported on this GPU + +s_get_waveid_in_workgroup s0 +// CHECK: error: instruction not supported on this GPU + +s_gl1_inv +// CHECK: error: instruction not supported on this GPU + +s_inst_prefetch 0x0 +// CHECK: error: instruction not supported on this GPU + +s_lshl1_add_u32 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_lshl2_add_u32 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_lshl3_add_u32 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_lshl4_add_u32 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_movrelsd_2_b32 s0, s1 +// CHECK: error: instruction not supported on this GPU + +s_mul_hi_i32 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_mul_hi_u32 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_nand_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_nor_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_or_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_orn1_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_orn1_saveexec_b64 exec, s[2:3] +// CHECK: error: instruction not supported on this GPU + +s_orn2_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_pack_hh_b32_b16 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_pack_lh_b32_b16 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_pack_ll_b32_b16 exec_hi, s1, s2 +// CHECK: error: instruction not supported on this GPU + +s_round_mode 0x0 +// CHECK: error: instruction not supported on this GPU + +s_scratch_load_dword s5, s[2:3], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_scratch_load_dwordx2 s[100:101], s[2:3], s0 +// CHECK: error: instruction not supported on this GPU + +s_scratch_load_dwordx4 s[20:23], s[4:5], s0 +// CHECK: error: instruction not supported on this GPU + +s_scratch_store_dword s1, s[4:5], 0x123 glc +// CHECK: error: instruction not supported on this GPU + +s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc +// CHECK: error: instruction not supported on this GPU + +s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc +// CHECK: error: instruction not supported on this GPU + +s_subvector_loop_begin exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_subvector_loop_end exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_ttracedata_imm 0x0 +// CHECK: error: instruction not supported on this GPU + +s_version 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_waitcnt_expcnt exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_waitcnt_lgkmcnt exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_waitcnt_vmcnt exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_waitcnt_vscnt exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_xnor_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_xor_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +scratch_load_dword v0, v1, off offset:-2048 glc slc +// CHECK: error: instruction not supported on this GPU + +scratch_load_dwordx2 v[1:2], v3, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_dwordx3 v[1:3], v4, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_dwordx4 v[1:4], v5, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_sbyte v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_sbyte_d16 v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_sbyte_d16_hi v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_short_d16 v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_short_d16_hi v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_sshort v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_ubyte v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_ubyte_d16 v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_ubyte_d16_hi v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_load_ushort v1, v2, off +// CHECK: error: instruction not supported on this GPU + +scratch_store_byte off, v2, flat_scratch_hi offset:-1 +// CHECK: error: instruction not supported on this GPU + +scratch_store_byte_d16_hi off, v2, flat_scratch_hi offset:-1 +// CHECK: error: instruction not supported on this GPU + +scratch_store_dword off, v2, exec_hi +// CHECK: error: instruction not supported on this GPU + +scratch_store_dwordx2 off, v[254:255], s3 offset:-1 +// CHECK: error: instruction not supported on this GPU + +scratch_store_dwordx3 off, v[253:255], s3 offset:-1 +// CHECK: error: instruction not supported on this GPU + +scratch_store_dwordx4 off, v[252:255], s3 offset:-1 +// CHECK: error: instruction not supported on this GPU + +scratch_store_short off, v2, flat_scratch_hi offset:-1 +// CHECK: error: instruction not supported on this GPU + +scratch_store_short_d16_hi off, v2, flat_scratch_hi offset:-1 +// CHECK: error: instruction not supported on this GPU + +v_accvgpr_read_b32 a0, a0 +// CHECK: error: instruction not supported on this GPU + +v_accvgpr_write_b32 a0, 65 +// CHECK: error: instruction not supported on this GPU + +v_add3_u32 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_add_co_ci_u32 v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_add_co_ci_u32_dpp v0, vcc, v0, v0, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// CHECK: error: instruction not supported on this GPU + +v_add_co_ci_u32_e32 v255, vcc, v1, v2, vcc +// CHECK: error: instruction not supported on this GPU + +v_add_co_ci_u32_e64 v255, s12, v1, v2, s6 +// CHECK: error: instruction not supported on this GPU + +v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_add_co_u32 v0, exec, v0, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_co_u32_dpp v255, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_add_co_u32_e32 v2, vcc, s0, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_co_u32_e64 v0, s0, v0, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +// CHECK: error: instruction not supported on this GPU + +v_add_i16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_i32 lds_direct, v0, v0 +// CHECK: error: instruction not supported on this GPU + +v_add_i32_e32 v0, vcc, 0.5, v0 +// CHECK: error: instruction not supported on this GPU + +v_add_i32_e64 v1, s[0:1], v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_add_lshl_u32 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_i16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_i32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_u16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_u32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_u32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_u32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_addc_co_u32 v0, vcc, shared_base, v0, vcc +// CHECK: error: instruction not supported on this GPU + +v_addc_co_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_addc_co_u32_e32 v3, vcc, 12345, v3, vcc +// CHECK: error: instruction not supported on this GPU + +v_addc_co_u32_e64 v255, s[12:13], v1, v2, s[6:7] +// CHECK: error: instruction not supported on this GPU + +v_addc_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_and_or_b32 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_ashr_i32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_ashr_i32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_ashr_i64 v[254:255], v[1:2], v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_eq_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_eq_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_eq_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_eq_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_f_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_f_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_f_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_f_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_ge_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_ge_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_ge_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_ge_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_gt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_gt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_gt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_gt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_le_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_le_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_le_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_le_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_lg_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_lg_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_lg_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_lg_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_lt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_lt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_lt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_lt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_neq_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_neq_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_neq_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_neq_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nge_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nge_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nge_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nge_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_ngt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_ngt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_ngt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_ngt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nle_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nle_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nle_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nle_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlg_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlg_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlg_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlg_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_o_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_o_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_o_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_o_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_tru_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_tru_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_tru_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_tru_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_u_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_u_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_u_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_u_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_eq_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_eq_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_eq_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_eq_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_f_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_f_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_f_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_f_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ge_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ge_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ge_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ge_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_gt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_gt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_gt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_gt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_le_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_le_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_le_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_le_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lg_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lg_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lg_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lg_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_neq_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_neq_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_neq_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_neq_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nge_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nge_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nge_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nge_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ngt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ngt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ngt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ngt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nle_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nle_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nle_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nle_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlg_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlg_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlg_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlg_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_o_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_o_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_o_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_o_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_tru_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_tru_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_tru_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_tru_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_u_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_u_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_u_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_u_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_i16_f16 v5, -4.0 +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_i16_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_i16_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_i16_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_i16_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_u16_f16 v5, s101 +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_u16_f16_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_u16_f16_e32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_u16_f16_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_cvt_norm_u16_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_cvt_pknorm_i16_f16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cvt_pknorm_u16_f16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_div_fixup_legacy_f16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_div_fixup_legacy_f16_e64 v5, 0.5, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot2_f32_f16 v0, -v1, -v2, -v3 +// CHECK: error: instruction not supported on this GPU + +v_dot2_i32_i16 v0, -v1, -v2, -v3 +// CHECK: error: instruction not supported on this GPU + +v_dot2_u32_u16 v0, -v1, -v2, -v3 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_f32_f16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_f32_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_f32_f16_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_i32_i16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_i32_i16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_dot4_i32_i8 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot4_u32_u8 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot4c_i32_i8 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot4c_i32_i8_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_dot4c_i32_i8_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot8_i32_i4 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot8_u32_u4 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot8c_i32_i4 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot8c_i32_i4_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_fma_legacy_f16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_fma_legacy_f16_e64 v5, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_fma_mix_f32 v0, -abs(v1), v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_fma_mixhi_f16 v0, -v1, abs(v2), -abs(v3) +// CHECK: error: instruction not supported on this GPU + +v_fma_mixlo_f16 v0, abs(v1), -v2, abs(v3) +// CHECK: error: instruction not supported on this GPU + +v_fmaak_f32 v255, v1, v2, 0x1121 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f16 v5, 0x1234, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f16_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f32 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmamk_f32 v255, v1, 0x1121, v3 +// CHECK: error: instruction not supported on this GPU + +v_interp_p2_legacy_f16 v255, v2, attr0.x, v3 +// CHECK: error: instruction not supported on this GPU + +v_log_clamp_f32 v1, 0.5 +// CHECK: error: instruction not supported on this GPU + +v_log_clamp_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_lshl_add_u32 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_lshl_b32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_lshl_b32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_lshl_b64 v[254:255], v[1:2], v2 +// CHECK: error: instruction not supported on this GPU + +v_lshl_or_b32 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_lshr_b32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_lshr_b32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_lshr_b64 v[254:255], v[1:2], v2 +// CHECK: error: instruction not supported on this GPU + +v_mac_legacy_f32 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_mac_legacy_f32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_mac_legacy_f32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_mad_i32_i16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_f16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_f16_e64 v5, 0.5, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_i16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_i16_e64 v5, 0, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_u16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_legacy_u16_e64 v5, 0, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_mix_f32 v0, -abs(v1), v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_mad_mixhi_f16 v0, -v1, abs(v2), -abs(v3) +// CHECK: error: instruction not supported on this GPU + +v_mad_mixlo_f16 v0, abs(v1), -v2, abs(v3) +// CHECK: error: instruction not supported on this GPU + +v_mad_u32_u16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_max3_f16 v0, src_lds_direct, v0, v0 +// CHECK: error: instruction not supported on this GPU + +v_max3_i16 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_max3_u16 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_max_legacy_f32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_max_legacy_f32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_med3_f16 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_med3_i16 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_med3_u16 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x1f32 a[0:31], 1, v1, a[1:32] +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_16x16x16i8 a[0:3], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_min3_f16 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_min3_i16 v0, src_lds_direct, v0, v0 +// CHECK: error: instruction not supported on this GPU + +v_min3_u16 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_min_legacy_f32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_min_legacy_f32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_2_b32 v0, v255 dpp8:[7,6,5,4,3,2,1,0] +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_2_b32_dpp v0, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_2_b32_e32 v5, 1 +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_2_b32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_2_b32_sdwa v0, 0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_mullit_f32 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_or3_b32 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_pack_b32_f16 v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_permlane16_b32 v0, lds_direct, s0, s0 +// CHECK: error: instruction not supported on this GPU + +v_permlanex16_b32 v0, lds_direct, s0, s0 +// CHECK: error: instruction not supported on this GPU + +v_pipeflush +// CHECK: error: instruction not supported on this GPU + +v_pipeflush_e64 +// CHECK: error: instruction not supported on this GPU + +v_pk_add_f16 v0, execz, v0 +// CHECK: error: instruction not supported on this GPU + +v_pk_add_i16 v0, src_lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_pk_add_u16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_ashrrev_i16 v0, lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_pk_fma_f16 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_pk_fmac_f16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_lshlrev_b16 v0, lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_pk_lshrrev_b16 v0, lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_pk_mad_i16 v0, src_lds_direct, v0, v0 +// CHECK: error: instruction not supported on this GPU + +v_pk_mad_u16 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_pk_max_f16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_max_i16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_max_u16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_min_f16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_min_i16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_min_u16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_mul_f16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_mul_lo_u16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_sub_i16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_pk_sub_u16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_rcp_clamp_f32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rcp_clamp_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rcp_clamp_f64 v[254:255], v[1:2] +// CHECK: error: instruction not supported on this GPU + +v_rcp_clamp_f64_e64 v[254:255], v[1:2] +// CHECK: error: instruction not supported on this GPU + +v_rcp_legacy_f32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rcp_legacy_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rsq_clamp_f32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rsq_clamp_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rsq_clamp_f64 v[254:255], v[1:2] +// CHECK: error: instruction not supported on this GPU + +v_rsq_clamp_f64_e64 v[254:255], v[1:2] +// CHECK: error: instruction not supported on this GPU + +v_rsq_legacy_f32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rsq_legacy_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_sat_pk_u8_i16 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_sat_pk_u8_i16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_sat_pk_u8_i16_e64 v5, -1 +// CHECK: error: instruction not supported on this GPU + +v_sat_pk_u8_i16_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_screen_partition_4se_b32 v5, -1 +// CHECK: error: instruction not supported on this GPU + +v_screen_partition_4se_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// CHECK: error: instruction not supported on this GPU + +v_screen_partition_4se_b32_e64 v5, -1 +// CHECK: error: instruction not supported on this GPU + +v_screen_partition_4se_b32_sdwa v5, v1 src0_sel:BYTE_0 +// CHECK: error: instruction not supported on this GPU + +v_sub_co_ci_u32_dpp v0, vcc, v0, v0, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// CHECK: error: instruction not supported on this GPU + +v_sub_co_ci_u32_e32 v255, vcc, v1, v2, vcc +// CHECK: error: instruction not supported on this GPU + +v_sub_co_ci_u32_e64 v255, s12, v1, v2, s6 +// CHECK: error: instruction not supported on this GPU + +v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_sub_co_u32 v0, s0, v0, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_co_u32_dpp v255, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_sub_co_u32_e32 v2, vcc, s0, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_co_u32_e64 v0, s0, v0, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +// CHECK: error: instruction not supported on this GPU + +v_sub_i16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_i32 v1, s[0:1], v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_sub_i32_e64 v255, s[12:13], v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_i16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_i32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_u16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_u32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_u32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_u32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_subb_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 +// CHECK: error: instruction not supported on this GPU + +v_subb_co_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_subb_co_u32_e64 v255, s[12:13], v1, v2, s[6:7] +// CHECK: error: instruction not supported on this GPU + +v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_subbrev_co_u32 v0, vcc, src_lds_direct, v0, vcc +// CHECK: error: instruction not supported on this GPU + +v_subbrev_co_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_subbrev_co_u32_e64 v255, s[12:13], v1, v2, s[6:7] +// CHECK: error: instruction not supported on this GPU + +v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_ci_u32 v0, vcc_lo, src_lds_direct, v0, vcc_lo +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_ci_u32_dpp v0, vcc, v0, v0, vcc dpp8:[7,6,5,4,3,2,1,0] +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_ci_u32_e32 v1, 0, v1 +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_ci_u32_e64 v255, s12, v1, v2, s6 +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_u32 v0, s0, src_lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_u32_dpp v255, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_u32_e32 v2, vcc, s0, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_u32_e64 v0, s0, v0, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +// CHECK: error: instruction not supported on this GPU + +v_subrev_i32 v1, s[0:1], v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_subrev_i32_e64 v255, s[12:13], v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_nc_u32 v0, src_lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_subrev_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// CHECK: error: instruction not supported on this GPU + +v_subrev_nc_u32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_nc_u32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_nc_u32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_swap_b32 v1, 1 +// CHECK: error: instruction not supported on this GPU + +v_swap_b32_e32 v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_swaprel_b32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_xad_u32 v1, v2, v3, v4 +// CHECK: error: instruction not supported on this GPU + +v_xnor_b32 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_xnor_b32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_xnor_b32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_xnor_b32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_xnor_b32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_xor3_b32 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +//===----------------------------------------------------------------------===// +// Unsupported e32 variants. +//===----------------------------------------------------------------------===// + +v_cvt_pkrtz_f16_f32_e32 v255, v1, v2 +// CHECK: error: e32 variant of this instruction is not supported + +//===----------------------------------------------------------------------===// +// Unsupported dpp variants. +//===----------------------------------------------------------------------===// + +v_movreld_b32_dpp v1, v0 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +v_movrels_b32_dpp v1, v0 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// CHECK: error: dpp variant of this instruction is not supported + +v_movrelsd_b32_dpp v0, v255 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// CHECK: error: dpp variant of this instruction is not supported + +//===----------------------------------------------------------------------===// +// Unsupported sdwa variants. +//===----------------------------------------------------------------------===// + +v_movreld_b32_sdwa v0, 64 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_movrels_b32_sdwa v0, 1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_movrelsd_b32_sdwa v0, 1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported diff --git a/llvm/test/MC/AMDGPU/gfx9_unsupported.s b/llvm/test/MC/AMDGPU/gfx9_unsupported.s new file mode 100644 index 0000000000000..cf1b2b90c8c5e --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx9_unsupported.s @@ -0,0 +1,1043 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --implicit-check-not=error: %s + +//===----------------------------------------------------------------------===// +// Unsupported instructions. +//===----------------------------------------------------------------------===// + +buffer_atomic_add_f32 v255, off, s[8:11], s3 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_atomic_fcmpswap v[0:1], off, s[0:3], s0 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_atomic_fcmpswap_x2 v[0:3], off, s[0:3], s0 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_atomic_fmax v0, off, s[0:3], s0 offset:4095 glc +// CHECK: error: instruction not supported on this GPU + +buffer_atomic_fmax_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_atomic_fmin v0, off, s[0:3], s0 +// CHECK: error: instruction not supported on this GPU + +buffer_atomic_fmin_x2 v[0:1], off, s[0:3], s0 offset:4095 slc +// CHECK: error: instruction not supported on this GPU + +buffer_atomic_pk_add_f16 v255, off, s[8:11], s3 offset:4095 +// CHECK: error: instruction not supported on this GPU + +buffer_gl0_inv +// CHECK: error: instruction not supported on this GPU + +buffer_gl1_inv +// CHECK: error: instruction not supported on this GPU + +flat_atomic_fcmpswap v0, v[1:2], v[2:3] glc +// CHECK: error: instruction not supported on this GPU + +flat_atomic_fcmpswap_x2 v[0:1], v[1:2], v[2:5] glc +// CHECK: error: instruction not supported on this GPU + +flat_atomic_fmax v0, v[1:2], v2 glc +// CHECK: error: instruction not supported on this GPU + +flat_atomic_fmax_x2 v[0:1], v[1:2], v[2:3] glc +// CHECK: error: instruction not supported on this GPU + +flat_atomic_fmin v0, v[1:2], v2 glc +// CHECK: error: instruction not supported on this GPU + +flat_atomic_fmin_x2 v[0:1], v[1:2], v[2:3] glc +// CHECK: error: instruction not supported on this GPU + +global_atomic_add_f32 v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +global_atomic_pk_add_f16 v[1:2], v2, off +// CHECK: error: instruction not supported on this GPU + +s_and_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_andn1_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_andn1_wrexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_andn2_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_andn2_wrexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_clause 0x0 +// CHECK: error: instruction not supported on this GPU + +s_code_end +// CHECK: error: instruction not supported on this GPU + +s_denorm_mode 0x0 +// CHECK: error: instruction not supported on this GPU + +s_get_waveid_in_workgroup s0 +// CHECK: error: instruction not supported on this GPU + +s_gl1_inv +// CHECK: error: instruction not supported on this GPU + +s_inst_prefetch 0x0 +// CHECK: error: instruction not supported on this GPU + +s_movrelsd_2_b32 s0, s1 +// CHECK: error: instruction not supported on this GPU + +s_nand_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_nor_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_or_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_orn1_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_orn2_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_round_mode 0x0 +// CHECK: error: instruction not supported on this GPU + +s_subvector_loop_begin exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_subvector_loop_end exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_ttracedata_imm 0x0 +// CHECK: error: instruction not supported on this GPU + +s_version 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_waitcnt_expcnt exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_waitcnt_lgkmcnt exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_waitcnt_vmcnt exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_waitcnt_vscnt exec_hi, 0x1234 +// CHECK: error: instruction not supported on this GPU + +s_xnor_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +s_xor_saveexec_b32 exec_hi, s1 +// CHECK: error: instruction not supported on this GPU + +v_accvgpr_read_b32 a0, a0 +// CHECK: error: instruction not supported on this GPU + +v_accvgpr_write_b32 a0, 65 +// CHECK: error: instruction not supported on this GPU + +v_add_co_ci_u32 v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_add_co_ci_u32_dpp v0, vcc, v0, v0, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// CHECK: error: instruction not supported on this GPU + +v_add_co_ci_u32_e32 v255, vcc, v1, v2, vcc +// CHECK: error: instruction not supported on this GPU + +v_add_co_ci_u32_e64 v255, s12, v1, v2, s6 +// CHECK: error: instruction not supported on this GPU + +v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_add_nc_i16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_i32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_u16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_u32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_u32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_add_nc_u32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_addc_u32 v0, vcc, exec_hi, v0, vcc +// CHECK: error: instruction not supported on this GPU + +v_addc_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_addc_u32_e32 v1, -1, v2, v3, s0 +// CHECK: error: instruction not supported on this GPU + +v_addc_u32_e64 v0, s[0:1], s0, s0, s[0:1] +// CHECK: error: instruction not supported on this GPU + +v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_ashr_i32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_ashr_i32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_ashr_i64 v[254:255], v[1:2], v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_eq_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_eq_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_eq_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_eq_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_f_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_f_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_f_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_f_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_ge_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_ge_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_ge_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_ge_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_gt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_gt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_gt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_gt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_le_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_le_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_le_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_le_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_lg_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_lg_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_lg_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_lg_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_lt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_lt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_lt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_lt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_neq_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_neq_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_neq_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_neq_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nge_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nge_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nge_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nge_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_ngt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_ngt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_ngt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_ngt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nle_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nle_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nle_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nle_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlg_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlg_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlg_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlg_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_nlt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_o_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_o_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_o_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_o_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_tru_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_tru_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_tru_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_tru_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_u_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_u_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmps_u_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmps_u_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_eq_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_eq_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_eq_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_eq_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_f_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_f_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_f_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_f_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ge_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ge_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ge_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ge_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_gt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_gt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_gt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_gt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_le_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_le_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_le_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_le_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lg_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lg_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lg_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lg_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_lt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_neq_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_neq_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_neq_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_neq_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nge_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nge_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nge_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nge_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ngt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ngt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ngt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_ngt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nle_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nle_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nle_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nle_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlg_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlg_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlg_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlg_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlt_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlt_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlt_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_nlt_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_o_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_o_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_o_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_o_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_tru_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_tru_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_tru_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_tru_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_u_f32 vcc, -1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_u_f32_e64 flat_scratch, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_u_f64 vcc, -1, v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_cmpsx_u_f64_e64 flat_scratch, v[1:2], v[2:3] +// CHECK: error: instruction not supported on this GPU + +v_dot2_f32_f16 v0, -v1, -v2, -v3 +// CHECK: error: instruction not supported on this GPU + +v_dot2_i32_i16 v0, -v1, -v2, -v3 +// CHECK: error: instruction not supported on this GPU + +v_dot2_u32_u16 v0, -v1, -v2, -v3 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_f32_f16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_f32_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_f32_f16_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_i32_i16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot2c_i32_i16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_dot4_i32_i8 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot4_u32_u8 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot4c_i32_i8 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot4c_i32_i8_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_dot4c_i32_i8_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot8_i32_i4 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot8_u32_u4 v0, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_dot8c_i32_i4 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_dot8c_i32_i4_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_fma_mix_f32 v0, -abs(v1), v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_fma_mixhi_f16 v0, -v1, abs(v2), -abs(v3) +// CHECK: error: instruction not supported on this GPU + +v_fma_mixlo_f16 v0, abs(v1), -v2, abs(v3) +// CHECK: error: instruction not supported on this GPU + +v_fmaak_f32 v255, v1, v2, 0x1121 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f16 v5, 0x1234, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f16_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f16_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f32 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmac_f32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_fmamk_f32 v255, v1, 0x1121, v3 +// CHECK: error: instruction not supported on this GPU + +v_log_clamp_f32 v1, 0.5 +// CHECK: error: instruction not supported on this GPU + +v_log_clamp_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_lshl_b32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_lshl_b32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_lshl_b64 v[254:255], v[1:2], v2 +// CHECK: error: instruction not supported on this GPU + +v_lshr_b32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_lshr_b32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_lshr_b64 v[254:255], v[1:2], v2 +// CHECK: error: instruction not supported on this GPU + +v_mac_legacy_f32 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_mac_legacy_f32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_mac_legacy_f32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_max_legacy_f32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_max_legacy_f32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x1f32 a[0:31], 1, v1, a[1:32] +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_16x16x16i8 a[0:3], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 +// CHECK: error: instruction not supported on this GPU + +v_min_legacy_f32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_min_legacy_f32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_movreld_b32 v0, 123 +// CHECK: error: instruction not supported on this GPU + +v_movreld_b32_dpp v1, v0 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_movreld_b32_e32 v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_movreld_b32_e64 v0, flat_scratch_hi +// CHECK: error: instruction not supported on this GPU + +v_movreld_b32_sdwa v0, 64 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_movrels_b32 v0, v2 dpp8:[0,0,0,0,0,0,0,0] +// CHECK: error: instruction not supported on this GPU + +v_movrels_b32_dpp v1, v0 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// CHECK: error: instruction not supported on this GPU + +v_movrels_b32_e32 v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_movrels_b32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_movrels_b32_sdwa v0, 1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_2_b32 v0, v255 dpp8:[7,6,5,4,3,2,1,0] +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_2_b32_dpp v0, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_2_b32_e32 v5, 1 +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_2_b32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_2_b32_sdwa v0, 0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_b32 v0, v2 dpp8:[7,6,5,4,3,2,1,0] +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_b32_dpp v0, v255 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_b32_e32 v1, s2 +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_b32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_movrelsd_b32_sdwa v0, 1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_mullit_f32 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_permlane16_b32 v0, lds_direct, s0, s0 +// CHECK: error: instruction not supported on this GPU + +v_permlanex16_b32 v0, lds_direct, s0, s0 +// CHECK: error: instruction not supported on this GPU + +v_pipeflush +// CHECK: error: instruction not supported on this GPU + +v_pipeflush_e64 +// CHECK: error: instruction not supported on this GPU + +v_pk_fmac_f16 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_rcp_clamp_f32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rcp_clamp_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rcp_clamp_f64 v[254:255], v[1:2] +// CHECK: error: instruction not supported on this GPU + +v_rcp_clamp_f64_e64 v[254:255], v[1:2] +// CHECK: error: instruction not supported on this GPU + +v_rcp_legacy_f32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rcp_legacy_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rsq_clamp_f32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rsq_clamp_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rsq_clamp_f64 v[254:255], v[1:2] +// CHECK: error: instruction not supported on this GPU + +v_rsq_clamp_f64_e64 v[254:255], v[1:2] +// CHECK: error: instruction not supported on this GPU + +v_rsq_legacy_f32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_rsq_legacy_f32_e64 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_sub_co_ci_u32_dpp v0, vcc, v0, v0, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// CHECK: error: instruction not supported on this GPU + +v_sub_co_ci_u32_e32 v255, vcc, v1, v2, vcc +// CHECK: error: instruction not supported on this GPU + +v_sub_co_ci_u32_e64 v255, s12, v1, v2, s6 +// CHECK: error: instruction not supported on this GPU + +v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_i16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_i32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_u16 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_u32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_u32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_sub_nc_u32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_subb_u32 v1, s[0:1], v2, v3, vcc +// CHECK: error: instruction not supported on this GPU + +v_subb_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_subb_u32_e64 v255, s[12:13], v1, v2, s[6:7] +// CHECK: error: instruction not supported on this GPU + +v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_subbrev_u32 v1, s[0:1], v2, v3, vcc +// CHECK: error: instruction not supported on this GPU + +v_subbrev_u32_dpp v255, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_subbrev_u32_e64 v255, s[12:13], v1, v2, s[6:7] +// CHECK: error: instruction not supported on this GPU + +v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_ci_u32 v0, vcc_lo, src_lds_direct, v0, vcc_lo +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_ci_u32_dpp v0, vcc, v0, v0, vcc dpp8:[7,6,5,4,3,2,1,0] +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_ci_u32_e32 v1, 0, v1 +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_ci_u32_e64 v255, s12, v1, v2, s6 +// CHECK: error: instruction not supported on this GPU + +v_subrev_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_subrev_i32 v1, s[0:1], v2, v3 +// CHECK: error: instruction not supported on this GPU + +v_subrev_i32_e64 v255, s[12:13], v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_nc_u32 v0, src_lds_direct, v0 +// CHECK: error: instruction not supported on this GPU + +v_subrev_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// CHECK: error: instruction not supported on this GPU + +v_subrev_nc_u32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_nc_u32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_subrev_nc_u32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_swaprel_b32 v255, v1 +// CHECK: error: instruction not supported on this GPU + +v_xnor_b32 v0, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_xnor_b32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: error: instruction not supported on this GPU + +v_xnor_b32_e32 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_xnor_b32_e64 v255, v1, v2 +// CHECK: error: instruction not supported on this GPU + +v_xnor_b32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: instruction not supported on this GPU + +v_xor3_b32 v255, v1, v2, v3 +// CHECK: error: instruction not supported on this GPU + +//===----------------------------------------------------------------------===// +// Unsupported e32 variants. +//===----------------------------------------------------------------------===// + +v_add_i32_e32 v0, vcc, 0.5, v0 +// CHECK: error: e32 variant of this instruction is not supported + +v_cvt_pkrtz_f16_f32_e32 v255, v1, v2 +// CHECK: error: e32 variant of this instruction is not supported + +//===----------------------------------------------------------------------===// +// Unsupported e64 variants. +//===----------------------------------------------------------------------===// + +v_swap_b32_e64 v1, v2 +// CHECK: error: e64 variant of this instruction is not supported + +//===----------------------------------------------------------------------===// +// Unsupported sdwa variants. +//===----------------------------------------------------------------------===// + +v_mac_f16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported + +v_mac_f32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: error: sdwa variant of this instruction is not supported diff --git a/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s b/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s index 14800de71cbd2..a5cca6ba5bd93 100644 --- a/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s +++ b/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc -triple amdgcn < %s 2>&1 | FileCheck %s +# RUN: not llvm-mc -triple amdgcn < %s 2>&1 | FileCheck --strict-whitespace %s # This tests the mnemonic spell checker. @@ -6,9 +6,9 @@ v2, v4, v6 -# CHECK: unknown token in expression -# CHECK-NEXT: v2, v4, v6 -# CHECK-NEXT: ^ +# CHECK: error: invalid instruction +# CHECK-NEXT:{{^}}v2, v4, v6 +# CHECK-NEXT:{{^}}^ # We don't want to see a suggestion here; the edit distance is too large to # give sensible suggestions: @@ -16,29 +16,29 @@ v2, v4, v6 aaaaaaaaaaaaaaa v1, v2, v3 # CHECK: error: invalid instruction -# CHECK-NEXT: aaaaaaaaaaaaaaa v1, v2, v3 -# CHECK-NEXT: ^ +# CHECK-NEXT:{{^}}aaaaaaaaaaaaaaa v1, v2, v3 +# CHECK-NEXT:{{^}}^ # Check that we get one suggestion: 'dsc_write_src2_b64' is 1 edit away, i.e. an deletion. dsc_write_src2_b64 v1, v2, v3 # CHECK: error: invalid instruction, did you mean: ds_write_src2_b64? -# CHECK-NEXT: dsc_write_src2_b64 v1, v2, v3 -# CHECK-NEXT: ^ +# CHECK-NEXT:{{^}}dsc_write_src2_b64 v1, v2, v3 +# CHECK-NEXT:{{^}}^ # Check edit distance 1 and 2, just insertions: s_mov_b v1, v2 # CHECK: error: invalid instruction, did you mean: s_mov_b32, s_mov_b64? -# CHECK-NEXT: s_mov_b v1, v2 -# CHECK-NEXT: ^ +# CHECK-NEXT:{{^}}s_mov_b v1, v2 +# CHECK-NEXT:{{^}}^ # Check an instruction that is 2 edits away, and also has a lot of candidates: s_load_dwordx v1, v2, v3 # CHECK: error: invalid instruction, did you mean: s_load_dword, s_load_dwordx16, s_load_dwordx2, s_load_dwordx4, s_load_dwordx8? -# CHECK-NEXT: s_load_dwordx v1, v2, v3 -# CHECK-NEXT: ^ +# CHECK-NEXT:{{^}}s_load_dwordx v1, v2, v3 +# CHECK-NEXT:{{^}}^ diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s index ce6893ed057b9..f639fd9b19fac 100644 --- a/llvm/test/MC/AMDGPU/literals.s +++ b/llvm/test/MC/AMDGPU/literals.s @@ -509,13 +509,19 @@ s_mov_b64 s[0:1], 0x1000000001 // NOGCN: error: invalid operand for instruction s_mov_b64 s[0:1], 0x1000000fff -// NOGCN: error: invalid operand for instruction +// NOGFX89: error: invalid operand for instruction +// NOSI: error: instruction not supported on this GPU +// NOCIVI: error: invalid operand for instruction v_trunc_f64 v[0:1], 0x1fffffffff0 -// NOGCN: error: invalid operand for instruction +// NOGFX89: error: invalid operand for instruction +// NOSI: error: instruction not supported on this GPU +// NOCIVI: error: invalid operand for instruction v_trunc_f64 v[0:1], 0x100000001 -// NOGCN: error: invalid operand for instruction +// NOGFX89: error: invalid operand for instruction +// NOSI: error: instruction not supported on this GPU +// NOCIVI: error: invalid operand for instruction v_trunc_f64 v[0:1], 0x1fffffff000 //---------------------------------------------------------------------------// @@ -554,12 +560,12 @@ s_and_b64 s[0:1], s[0:1], src_scc // GFX89: v_add_u16_e32 v0, src_vccz, v0 ; encoding: [0xfb,0x00,0x00,0x4c] v_add_u16 v0, vccz, v0 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // GFX9: v_add_u16_sdwa v0, src_scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xfd,0x06,0x86,0x06] v_add_u16_sdwa v0, scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // GFX9: v_add_u16_sdwa v0, v0, src_scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xfa,0x01,0x4c,0x00,0x06,0x06,0x86] v_add_u16_sdwa v0, v0, scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD @@ -592,20 +598,20 @@ v_max_f64 v[0:1], scc, v[0:1] // GFX9: v_pk_add_f16 v0, src_execz, v0 ; encoding: [0x00,0x00,0x8f,0xd3,0xfc,0x00,0x02,0x18] v_pk_add_f16 v0, execz, v0 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // GFX89: v_ceil_f16_e64 v0, -src_vccz ; encoding: [0x00,0x00,0x85,0xd1,0xfb,0x00,0x00,0x20] v_ceil_f16 v0, neg(vccz) -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // GFX89: v_ceil_f16_e64 v0, |src_scc| ; encoding: [0x00,0x01,0x85,0xd1,0xfd,0x00,0x00,0x00] v_ceil_f16 v0, abs(scc) -// NOSI: error: not a valid operand +// NOSI: error: instruction not supported on this GPU // CI: v_ceil_f64_e64 v[5:6], |src_execz| ; encoding: [0x05,0x01,0x30,0xd3,0xfc,0x00,0x00,0x00] // GFX89: v_ceil_f64_e64 v[5:6], |src_execz| ; encoding: [0x05,0x01,0x58,0xd1,0xfc,0x00,0x00,0x00] v_ceil_f64 v[5:6], |execz| -// NOSI: error: not a valid operand +// NOSI: error: instruction not supported on this GPU // CI: v_ceil_f64_e64 v[5:6], -vcc ; encoding: [0x05,0x00,0x30,0xd3,0x6a,0x00,0x00,0x20] // GFX89: v_ceil_f64_e64 v[5:6], -vcc ; encoding: [0x05,0x00,0x58,0xd1,0x6a,0x00,0x00,0x20] v_ceil_f64 v[5:6], -vcc @@ -618,22 +624,24 @@ v_ceil_f32 v0, -vccz // GFX89: v_ceil_f32_e64 v0, |src_execz| ; encoding: [0x00,0x01,0x5d,0xd1,0xfc,0x00,0x00,0x00] v_ceil_f32 v0, |execz| -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // GFX9: v_ceil_f16_sdwa v5, |src_vccz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfb,0x16,0xa6,0x00] v_ceil_f16_sdwa v5, |vccz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // GFX9: v_ceil_f16_sdwa v5, -src_scc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfd,0x16,0x96,0x00] v_ceil_f16_sdwa v5, -scc dst_sel:DWORD dst_unused:UNUSED_PRESERVE -// NOSICIVI: error: invalid operand for instruction // GFX9: v_ceil_f32_sdwa v5, src_vccz dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfb,0x16,0x86,0x00] +// NOSICI: error: sdwa variant of this instruction is not supported +// NOVI: error: invalid operand for instruction v_ceil_f32_sdwa v5, vccz dst_sel:DWORD src0_sel:DWORD -// NOSICIVI: error: invalid operand for instruction // GFX9: v_ceil_f32_sdwa v5, |src_execz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfc,0x16,0xa6,0x00] +// NOSICI: error: sdwa variant of this instruction is not supported +// NOVI: error: invalid operand for instruction v_ceil_f32_sdwa v5, |execz| dst_sel:DWORD src0_sel:DWORD //---------------------------------------------------------------------------// @@ -648,12 +656,6 @@ buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095 // GFX9: s_add_i32 s0, src_shared_base, s0 ; encoding: [0xeb,0x00,0x00,0x81] s_add_i32 s0, src_shared_base, s0 - - - - - - // NOSICIVI: error: register not available on this GPU // GFX9: s_add_i32 s0, src_shared_limit, s0 ; encoding: [0xec,0x00,0x00,0x81] s_add_i32 s0, src_shared_limit, s0 @@ -690,32 +692,38 @@ s_and_b64 s[0:1], s[0:1], src_private_limit // GFX9: s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id ; encoding: [0x00,0xef,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id -// NOSICIVI: error: register not available on this GPU // GFX9: v_add_u16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4c] +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: register not available on this GPU v_add_u16 v0, src_shared_base, v0 -// NOSICIVI: error: register not available on this GPU // GFX9: v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xeb,0x06,0x86,0x06] +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: register not available on this GPU v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICIVI: error: register not available on this GPU // GFX9: v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xd6,0x01,0x4c,0x00,0x06,0x06,0x86] +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: register not available on this GPU v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICIVI: error: register not available on this GPU // GFX9: v_add_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x68] +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: register not available on this GPU v_add_u32 v0, src_shared_base, v0 -// NOSICIVI: error: register not available on this GPU // GFX9: v_add_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x34,0xd1,0xeb,0x00,0x02,0x00] +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: register not available on this GPU v_add_u32_e64 v0, src_shared_base, v0 // NOSICIVI: error: register not available on this GPU // GFX9: v_cmp_eq_i64_e32 vcc, src_shared_base, v[0:1] ; encoding: [0xeb,0x00,0xc4,0x7d] v_cmp_eq_i64 vcc, src_shared_base, v[0:1] -// NOSICIVI: error: register not available on this GPU // GFX9: v_max_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x5a] +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: register not available on this GPU v_max_f16 v0, src_shared_base, v0 // NOSICIVI: error: register not available on this GPU @@ -726,28 +734,28 @@ v_max_f32 v0, src_shared_base, v0 // GFX9: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xeb,0x00,0x02,0x00] v_max_f64 v[0:1], src_shared_base, v[0:1] -// NOSICIVI: error: register not available on this GPU +// NOSICIVI: error: instruction not supported on this GPU // GFX9: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x8f,0xd3,0xeb,0x00,0x02,0x18] v_pk_add_f16 v0, src_shared_base, v0 // GFX9: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x85,0xd1,0xeb,0x00,0x00,0x20] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: register not available on this GPU v_ceil_f16 v0, neg(src_shared_base) // GFX9: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x85,0xd1,0xeb,0x00,0x00,0x00] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: register not available on this GPU v_ceil_f16 v0, abs(src_shared_base) // GFX9: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x58,0xd1,0xeb,0x00,0x00,0x00] -// NOSI: error: not a valid operand. +// NOSI: error: instruction not supported on this GPU // NOCIVI: error: register not available on this GPU // NOVI: error: register not available on this GPU v_ceil_f64 v[5:6], |src_shared_base| // GFX9: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x58,0xd1,0xeb,0x00,0x00,0x20] -// NOSI: error: not a valid operand. +// NOSI: error: instruction not supported on this GPU // NOCIVI: error: register not available on this GPU // NOVI: error: register not available on this GPU v_ceil_f64 v[5:6], -src_shared_base @@ -761,29 +769,32 @@ v_ceil_f32 v0, -src_shared_base v_ceil_f32 v0, |src_shared_base| // GFX9: v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0xa6,0x00] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: register not available on this GPU v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE // GFX9: v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0x96,0x00] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: register not available on this GPU v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE -// NOSICIVI: error: register not available on this GPU // GFX9: v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0x86,0x00] +// NOSICI: error: sdwa variant of this instruction is not supported +// NOVI: error: register not available on this GPU v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD src0_sel:DWORD -// NOSICIVI: error: register not available on this GPU // GFX9: v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0xa6,0x00] +// NOSICI: error: sdwa variant of this instruction is not supported +// NOVI: error: register not available on this GPU v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD src0_sel:DWORD //---------------------------------------------------------------------------// // named inline values compete with other scalars for constant bus access //---------------------------------------------------------------------------// -// NOSICIVI: error: register not available on this GPU // NOGFX9: error: invalid operand (violates constant bus restrictions) +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: register not available on this GPU v_add_u32 v0, private_base, s0 // NOSICIVI: error: instruction not supported on this GPU @@ -818,7 +829,7 @@ v_div_fmas_f32 v0, v0, scc, v1 v_div_fmas_f32 v0, v0, v1, vccz // v_addc_co_u32 implicitly reads VCC (VOP2) -// NOSICIVI: error: register not available on this GPU +// NOSICIVI: error: instruction not supported on this GPU // NOGFX9: error: invalid operand (violates constant bus restrictions) v_addc_co_u32 v0, vcc, shared_base, v0, vcc @@ -840,7 +851,7 @@ v_cmp_eq_f32 s[0:1], private_base, s0 // NOGCN: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32 s[0:1], execz, s0 -// NOSICIVI: error: register not available on this GPU +// NOSICIVI: error: instruction not supported on this GPU // NOGFX9: error: invalid operand (violates constant bus restrictions) v_pk_add_f16 v255, private_base, private_limit diff --git a/llvm/test/MC/AMDGPU/mad-mix.s b/llvm/test/MC/AMDGPU/mad-mix.s index 0a261a922725d..f1de62b5a5482 100644 --- a/llvm/test/MC/AMDGPU/mad-mix.s +++ b/llvm/test/MC/AMDGPU/mad-mix.s @@ -20,57 +20,57 @@ v_mad_mixhi_f16 v0, v1, v2, v3 v_mad_mix_f32 v0, abs(v1), v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU // FIXME: Improve diagnistics v_mad_mix_f32 v0, v1, abs(v2), v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, |v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mix_f32 v0, v1, v2, abs(v3) // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mix_f32 v0, -v1, v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x24] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mix_f32 v0, v1, -v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x44] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mix_f32 v0, v1, v2, -v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x84] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mix_f32 v0, -abs(v1), v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, -|v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x24] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mix_f32 v0, v1, -abs(v2), v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, -|v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x44] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mix_f32 v0, v1, v2, -abs(v3) // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, -|v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x84] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mixlo_f16 v0, abs(v1), -v2, abs(v3) // GFX9-MADMIX: v_mad_mixlo_f16 v0, |v1|, -v2, |v3| ; encoding: [0x00,0x05,0xa1,0xd3,0x01,0x05,0x0e,0x44] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mixhi_f16 v0, -v1, abs(v2), -abs(v3) // GFX9-MADMIX: v_mad_mixhi_f16 v0, -v1, |v2|, -|v3| ; encoding: [0x00,0x06,0xa2,0xd3,0x01,0x05,0x0e,0xa4] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mixlo_f16 v0, v1, v2, v3 clamp // GFX9-MADMIX: v_mad_mixlo_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa1,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-FMAMIX-ERR: error: invalid operand for instruction +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mixhi_f16 v0, v1, v2, v3 clamp // GFX9-MADMIX: v_mad_mixhi_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa2,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-FMAMIX-ERR: error: invalid operand for instruction +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU // // op_sel with non-packed instructions @@ -78,25 +78,25 @@ v_mad_mixhi_f16 v0, v1, v2, v3 clamp v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,0,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU // FIXME: Improve diagnistics v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x08,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x10,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x00,0x20,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x00,0x38,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mix_f32 v0, v1, v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] @@ -104,24 +104,24 @@ v_mad_mix_f32 v0, v1, v2, v3 v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x0c] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x14] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x1c] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp // GFX9-MADMIX: v_mad_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa1,0xd3,0x01,0x05,0x0e,0x0c] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp // GFX9-MADMIX: v_mad_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa2,0xd3,0x01,0x05,0x0e,0x0c] -// GFX9-FMAMIX-ERR: error: not a valid operand. +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/mai-err.s b/llvm/test/MC/AMDGPU/mai-err.s index 6f3361c0c9f3d..32a7d8e186c88 100644 --- a/llvm/test/MC/AMDGPU/mai-err.s +++ b/llvm/test/MC/AMDGPU/mai-err.s @@ -3,61 +3,61 @@ v_accvgpr_read_b32 v0, v0 // GFX908: error: invalid operand for instruction -// GFX900: error: invalid operand for instruction +// GFX900: error: instruction not supported on this GPU v_accvgpr_read_b32 a0, a0 // GFX908: error: invalid operand for instruction -// GFX900: error: invalid operand for instruction +// GFX900: error: instruction not supported on this GPU v_accvgpr_read_b32 v0, 1 // GFX908: error: invalid operand for instruction -// GFX900: error: invalid operand for instruction +// GFX900: error: instruction not supported on this GPU v_accvgpr_read_b32 v0, s0 // GFX908: error: invalid operand for instruction -// GFX900: error: invalid operand for instruction +// GFX900: error: instruction not supported on this GPU v_accvgpr_read_b32 v0, a0 // GFX900: error: instruction not supported on this GPU v_accvgpr_write_b32 v0, v0 // GFX908: error: invalid operand for instruction -// GFX900: error: invalid operand for instruction +// GFX900: error: instruction not supported on this GPU v_accvgpr_write_b32 a0, a0 // GFX908: error: invalid operand for instruction -// GFX900: error: invalid operand for instruction +// GFX900: error: instruction not supported on this GPU v_accvgpr_write_b32 a0, s0 // GFX908: error: invalid operand for instruction -// GFX900: error: invalid operand for instruction +// GFX900: error: instruction not supported on this GPU v_accvgpr_write_b32 a0, 65 // GFX908: error: invalid operand for instruction -// GFX900: error: invalid operand for instruction +// GFX900: error: instruction not supported on this GPU v_accvgpr_write_b32 a0, v0 // GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 v[0:31], v0, v1, a[1:32] // GFX908: error: invalid operand for instruction -// GFX900: error: invalid operand for instruction +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], v0, v1, v[1:32] // GFX908: error: invalid operand for instruction -// GFX900: error: invalid operand for instruction +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], s0, v1, a[1:32] // GFX908: error: invalid operand for instruction -// GFX900: error: invalid operand for instruction +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], 1, v1, a[1:32] // GFX908: error: invalid operand for instruction -// GFX900: error: invalid operand for instruction +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], v0, v1, 65 // GFX908: error: invalid operand for instruction -// GFX900: error: invalid operand for instruction +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], v0, v1, 0 // GFX900: error: instruction not supported on this GPU @@ -69,7 +69,7 @@ v_mfma_f32_32x32x1f32 a[0:31], v0, v1, -2.0 v_mfma_f32_32x32x1f32 a[0:31], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], v0, a1, -2.0 // GFX908: error: invalid literal operand @@ -77,7 +77,7 @@ v_mfma_f32_32x32x1f32 a[0:31], v0, a1, -2.0 v_mfma_f32_32x32x1f32 a[0:31], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], a0, v1, -2.0 // GFX908: error: invalid literal operand @@ -85,7 +85,7 @@ v_mfma_f32_32x32x1f32 a[0:31], a0, v1, -2.0 v_mfma_f32_32x32x1f32 a[0:31], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], a0, a1, -2.0 // GFX908: error: invalid literal operand @@ -93,7 +93,7 @@ v_mfma_f32_32x32x1f32 a[0:31], a0, a1, -2.0 v_mfma_f32_32x32x1f32 a[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x1f32 a[0:15], v0, v1, -2.0 // GFX908: error: invalid literal operand @@ -101,7 +101,7 @@ v_mfma_f32_16x16x1f32 a[0:15], v0, v1, -2.0 v_mfma_f32_16x16x1f32 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x1f32 a[0:15], v0, a1, -2.0 // GFX908: error: invalid literal operand @@ -109,7 +109,7 @@ v_mfma_f32_16x16x1f32 a[0:15], v0, a1, -2.0 v_mfma_f32_16x16x1f32 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x1f32 a[0:15], a0, v1, -2.0 // GFX908: error: invalid literal operand @@ -117,7 +117,7 @@ v_mfma_f32_16x16x1f32 a[0:15], a0, v1, -2.0 v_mfma_f32_16x16x1f32 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 // GFX908: error: invalid literal operand @@ -125,7 +125,7 @@ v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x1f32 a[0:3], v0, v1, -2.0 // GFX908: error: invalid literal operand @@ -133,7 +133,7 @@ v_mfma_f32_4x4x1f32 a[0:3], v0, v1, -2.0 v_mfma_f32_4x4x1f32 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x1f32 a[0:3], v0, a1, -2.0 // GFX908: error: invalid literal operand @@ -141,7 +141,7 @@ v_mfma_f32_4x4x1f32 a[0:3], v0, a1, -2.0 v_mfma_f32_4x4x1f32 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x1f32 a[0:3], a0, v1, -2.0 // GFX908: error: invalid literal operand @@ -149,7 +149,7 @@ v_mfma_f32_4x4x1f32 a[0:3], a0, v1, -2.0 v_mfma_f32_4x4x1f32 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 // GFX908: error: invalid literal operand @@ -157,7 +157,7 @@ v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2f32 a[0:15], v0, v1, -2.0 // GFX908: error: invalid literal operand @@ -165,7 +165,7 @@ v_mfma_f32_32x32x2f32 a[0:15], v0, v1, -2.0 v_mfma_f32_32x32x2f32 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2f32 a[0:15], v0, a1, -2.0 // GFX908: error: invalid literal operand @@ -173,7 +173,7 @@ v_mfma_f32_32x32x2f32 a[0:15], v0, a1, -2.0 v_mfma_f32_32x32x2f32 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2f32 a[0:15], a0, v1, -2.0 // GFX908: error: invalid literal operand @@ -181,7 +181,7 @@ v_mfma_f32_32x32x2f32 a[0:15], a0, v1, -2.0 v_mfma_f32_32x32x2f32 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 // GFX908: error: invalid literal operand @@ -189,7 +189,7 @@ v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f32 a[0:3], v0, v1, -2.0 // GFX908: error: invalid literal operand @@ -197,7 +197,7 @@ v_mfma_f32_16x16x4f32 a[0:3], v0, v1, -2.0 v_mfma_f32_16x16x4f32 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f32 a[0:3], v0, a1, -2.0 // GFX908: error: invalid literal operand @@ -205,7 +205,7 @@ v_mfma_f32_16x16x4f32 a[0:3], v0, a1, -2.0 v_mfma_f32_16x16x4f32 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f32 a[0:3], a0, v1, -2.0 // GFX908: error: invalid literal operand @@ -213,7 +213,7 @@ v_mfma_f32_16x16x4f32 a[0:3], a0, v1, -2.0 v_mfma_f32_16x16x4f32 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 // GFX908: error: invalid literal operand @@ -221,7 +221,7 @@ v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand @@ -229,7 +229,7 @@ v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], -2.0 v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand @@ -237,7 +237,7 @@ v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], -2.0 v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand @@ -245,7 +245,7 @@ v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], -2.0 v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand @@ -253,7 +253,7 @@ v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand @@ -261,7 +261,7 @@ v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], -2.0 v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand @@ -269,7 +269,7 @@ v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], -2.0 v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand @@ -277,7 +277,7 @@ v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], -2.0 v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand @@ -285,7 +285,7 @@ v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand @@ -293,7 +293,7 @@ v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], -2.0 v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand @@ -301,7 +301,7 @@ v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], -2.0 v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand @@ -309,7 +309,7 @@ v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], -2.0 v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand @@ -317,7 +317,7 @@ v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand @@ -325,7 +325,7 @@ v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], -2.0 v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand @@ -333,7 +333,7 @@ v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], -2.0 v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand @@ -341,7 +341,7 @@ v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], -2.0 v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand @@ -349,7 +349,7 @@ v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand @@ -357,7 +357,7 @@ v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], -2.0 v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand @@ -365,7 +365,7 @@ v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], -2.0 v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand @@ -373,7 +373,7 @@ v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], -2.0 v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand @@ -381,7 +381,7 @@ v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x4i8 a[0:31], v0, v1, 2 // GFX908: error: invalid literal operand @@ -389,7 +389,7 @@ v_mfma_i32_32x32x4i8 a[0:31], v0, v1, 2 v_mfma_i32_32x32x4i8 a[0:31], v0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x4i8 a[0:31], v0, a1, 2 // GFX908: error: invalid literal operand @@ -397,7 +397,7 @@ v_mfma_i32_32x32x4i8 a[0:31], v0, a1, 2 v_mfma_i32_32x32x4i8 a[0:31], v0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x4i8 a[0:31], a0, v1, 2 // GFX908: error: invalid literal operand @@ -405,7 +405,7 @@ v_mfma_i32_32x32x4i8 a[0:31], a0, v1, 2 v_mfma_i32_32x32x4i8 a[0:31], a0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 // GFX908: error: invalid literal operand @@ -413,7 +413,7 @@ v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x4i8 a[0:15], v0, v1, 2 // GFX908: error: invalid literal operand @@ -421,7 +421,7 @@ v_mfma_i32_16x16x4i8 a[0:15], v0, v1, 2 v_mfma_i32_16x16x4i8 a[0:15], v0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x4i8 a[0:15], v0, a1, 2 // GFX908: error: invalid literal operand @@ -429,7 +429,7 @@ v_mfma_i32_16x16x4i8 a[0:15], v0, a1, 2 v_mfma_i32_16x16x4i8 a[0:15], v0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x4i8 a[0:15], a0, v1, 2 // GFX908: error: invalid literal operand @@ -437,7 +437,7 @@ v_mfma_i32_16x16x4i8 a[0:15], a0, v1, 2 v_mfma_i32_16x16x4i8 a[0:15], a0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 // GFX908: error: invalid literal operand @@ -445,7 +445,7 @@ v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_4x4x4i8 a[0:3], v0, v1, 2 // GFX908: error: invalid literal operand @@ -453,7 +453,7 @@ v_mfma_i32_4x4x4i8 a[0:3], v0, v1, 2 v_mfma_i32_4x4x4i8 a[0:3], v0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_4x4x4i8 a[0:3], v0, a1, 2 // GFX908: error: invalid literal operand @@ -461,7 +461,7 @@ v_mfma_i32_4x4x4i8 a[0:3], v0, a1, 2 v_mfma_i32_4x4x4i8 a[0:3], v0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_4x4x4i8 a[0:3], a0, v1, 2 // GFX908: error: invalid literal operand @@ -469,7 +469,7 @@ v_mfma_i32_4x4x4i8 a[0:3], a0, v1, 2 v_mfma_i32_4x4x4i8 a[0:3], a0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 // GFX908: error: invalid literal operand @@ -477,7 +477,7 @@ v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x8i8 a[0:15], v0, v1, 2 // GFX908: error: invalid literal operand @@ -485,7 +485,7 @@ v_mfma_i32_32x32x8i8 a[0:15], v0, v1, 2 v_mfma_i32_32x32x8i8 a[0:15], v0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x8i8 a[0:15], v0, a1, 2 // GFX908: error: invalid literal operand @@ -493,7 +493,7 @@ v_mfma_i32_32x32x8i8 a[0:15], v0, a1, 2 v_mfma_i32_32x32x8i8 a[0:15], v0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x8i8 a[0:15], a0, v1, 2 // GFX908: error: invalid literal operand @@ -501,7 +501,7 @@ v_mfma_i32_32x32x8i8 a[0:15], a0, v1, 2 v_mfma_i32_32x32x8i8 a[0:15], a0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 // GFX908: error: invalid literal operand @@ -509,7 +509,7 @@ v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x16i8 a[0:3], v0, v1, 2 // GFX908: error: invalid literal operand @@ -517,7 +517,7 @@ v_mfma_i32_16x16x16i8 a[0:3], v0, v1, 2 v_mfma_i32_16x16x16i8 a[0:3], v0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x16i8 a[0:3], v0, a1, 2 // GFX908: error: invalid literal operand @@ -525,7 +525,7 @@ v_mfma_i32_16x16x16i8 a[0:3], v0, a1, 2 v_mfma_i32_16x16x16i8 a[0:3], v0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x16i8 a[0:3], a0, v1, 2 // GFX908: error: invalid literal operand @@ -533,7 +533,7 @@ v_mfma_i32_16x16x16i8 a[0:3], a0, v1, 2 v_mfma_i32_16x16x16i8 a[0:3], a0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x16i8 a[0:3], a0, a1, 2 // GFX908: error: invalid literal operand @@ -545,7 +545,7 @@ v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, -2.0 v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, -2.0 // GFX908: error: invalid literal operand @@ -553,7 +553,7 @@ v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, -2.0 v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, -2.0 // GFX908: error: invalid literal operand @@ -561,7 +561,7 @@ v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, -2.0 v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 // GFX908: error: invalid literal operand @@ -569,7 +569,7 @@ v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, -2.0 // GFX908: error: invalid literal operand @@ -577,7 +577,7 @@ v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, -2.0 v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, -2.0 // GFX908: error: invalid literal operand @@ -585,7 +585,7 @@ v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, -2.0 v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, -2.0 // GFX908: error: invalid literal operand @@ -593,7 +593,7 @@ v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, -2.0 v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 // GFX908: error: invalid literal operand @@ -601,7 +601,7 @@ v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, -2.0 // GFX908: error: invalid literal operand @@ -609,7 +609,7 @@ v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, -2.0 v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, -2.0 // GFX908: error: invalid literal operand @@ -617,7 +617,7 @@ v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, -2.0 v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, -2.0 // GFX908: error: invalid literal operand @@ -625,7 +625,7 @@ v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, -2.0 v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 // GFX908: error: invalid literal operand @@ -633,7 +633,7 @@ v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, -2.0 // GFX908: error: invalid literal operand @@ -641,7 +641,7 @@ v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, -2.0 v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, -2.0 // GFX908: error: invalid literal operand @@ -649,7 +649,7 @@ v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, -2.0 v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, -2.0 // GFX908: error: invalid literal operand @@ -657,7 +657,7 @@ v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, -2.0 v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 // GFX908: error: invalid literal operand @@ -665,7 +665,7 @@ v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, -2.0 // GFX908: error: invalid literal operand @@ -673,7 +673,7 @@ v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, -2.0 v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, -2.0 // GFX908: error: invalid literal operand @@ -681,7 +681,7 @@ v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, -2.0 v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, -2.0 // GFX908: error: invalid literal operand @@ -689,7 +689,7 @@ v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, -2.0 v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 // GFX908: error: invalid literal operand @@ -697,4 +697,4 @@ v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand -// GFX900: error: not a valid operand. +// GFX900: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/mubuf-gfx9.s b/llvm/test/MC/AMDGPU/mubuf-gfx9.s index 10909c63aff7a..840c372b4aa5e 100644 --- a/llvm/test/MC/AMDGPU/mubuf-gfx9.s +++ b/llvm/test/MC/AMDGPU/mubuf-gfx9.s @@ -39,23 +39,23 @@ buffer_load_format_d16_hi_x v5, off, s[8:11], s3 buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 // GFX9: buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe0,0x00,0x05,0x02,0x03] -// VI-ERR: error: not a valid operand. +// VI-ERR: error: instruction not supported on this GPU buffer_load_format_d16_hi_x v5, v0, s[8:11], s3 idxen offset:4095 // GFX9: buffer_load_format_d16_hi_x v5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x98,0xe0,0x00,0x05,0x02,0x03] -// VI-ERR: error: not a valid operand. +// VI-ERR: error: instruction not supported on this GPU buffer_load_format_d16_hi_x v5, v0, s[8:11], s3 offen offset:4095 // GFX9: buffer_load_format_d16_hi_x v5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x98,0xe0,0x00,0x05,0x02,0x03] -// VI-ERR: error: not a valid operand. +// VI-ERR: error: instruction not supported on this GPU buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 glc // GFX9: buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x98,0xe0,0x00,0x05,0x02,0x03] -// VI-ERR: error: not a valid operand. +// VI-ERR: error: instruction not supported on this GPU buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 slc // GFX9: buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x9a,0xe0,0x00,0x05,0x02,0x03] -// VI-ERR: error: not a valid operand. +// VI-ERR: error: instruction not supported on this GPU buffer_store_format_d16_hi_x v255, off, s[12:15], s4 // GFX9: buffer_store_format_d16_hi_x v255, off, s[12:15], s4 ; encoding: [0x00,0x00,0x9c,0xe0,0x00,0xff,0x03,0x04] @@ -63,20 +63,20 @@ buffer_store_format_d16_hi_x v255, off, s[12:15], s4 buffer_store_format_d16_hi_x v255, off, s[12:15], s4 offset:4095 // GFX9: buffer_store_format_d16_hi_x v255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe0,0x00,0xff,0x03,0x04] -// VI-ERR: error: not a valid operand. +// VI-ERR: error: instruction not supported on this GPU buffer_store_format_d16_hi_x v1, v0, s[12:15], s4 idxen offset:4095 // GFX9: buffer_store_format_d16_hi_x v1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x9c,0xe0,0x00,0x01,0x03,0x04] -// VI-ERR: error: not a valid operand. +// VI-ERR: error: instruction not supported on this GPU buffer_store_format_d16_hi_x v1, v0, s[12:15], s4 offen offset:4095 // GFX9: buffer_store_format_d16_hi_x v1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x9c,0xe0,0x00,0x01,0x03,0x04] -// VI-ERR: error: not a valid operand. +// VI-ERR: error: instruction not supported on this GPU buffer_store_format_d16_hi_x v1, off, s[12:15], s4 offset:4095 glc // GFX9: buffer_store_format_d16_hi_x v1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x9c,0xe0,0x00,0x01,0x03,0x04] -// VI-ERR: error: not a valid operand. +// VI-ERR: error: instruction not supported on this GPU buffer_store_format_d16_hi_x v1, off, s[12:15], s4 offset:4095 slc // GFX9: buffer_store_format_d16_hi_x v1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x9e,0xe0,0x00,0x01,0x03,0x04] -// VI-ERR: error: not a valid operand. +// VI-ERR: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/mubuf.s b/llvm/test/MC/AMDGPU/mubuf.s index a07a0a2aab180..ad0b9396753c1 100644 --- a/llvm/test/MC/AMDGPU/mubuf.s +++ b/llvm/test/MC/AMDGPU/mubuf.s @@ -721,43 +721,43 @@ buffer_atomic_add v5, off, s[8:11], 0.15915494 offset:4095 glc buffer_atomic_fcmpswap v[0:1], off, s[0:3], s0 offset:4095 // SICI: buffer_atomic_fcmpswap v[0:1], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0xf8,0xe0,0x00,0x00,0x00,0x00] -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU buffer_atomic_fcmpswap v[0:1], v[0:1], s[0:3], s0 addr64 offset:4095 // SICI: buffer_atomic_fcmpswap v[0:1], v[0:1], s[0:3], s0 addr64 offset:4095 ; encoding: [0xff,0x8f,0xf8,0xe0,0x00,0x00,0x00,0x00] -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU buffer_atomic_fcmpswap_x2 v[0:3], off, s[0:3], s0 offset:4095 // SICI: buffer_atomic_fcmpswap_x2 v[0:3], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe1,0x00,0x00,0x00,0x00] -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU buffer_atomic_fcmpswap_x2 v[0:3], v0, s[0:3], s0 idxen offset:4095 // SICI: buffer_atomic_fcmpswap_x2 v[0:3], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x78,0xe1,0x00,0x00,0x00,0x00] -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU buffer_atomic_fmax v1, off, s[0:3], s0 offset:4095 // SICI: buffer_atomic_fmax v1, off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x01,0x00,0x00] -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU buffer_atomic_fmax v0, off, s[0:3], s0 offset:7 // SICI: buffer_atomic_fmax v0, off, s[0:3], s0 offset:7 ; encoding: [0x07,0x00,0x00,0xe1,0x00,0x00,0x00,0x00] -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU buffer_atomic_fmax v0, off, s[0:3], s0 offset:4095 glc // SICI: buffer_atomic_fmax v0, off, s[0:3], s0 offset:4095 glc ; encoding: [0xff,0x4f,0x00,0xe1,0x00,0x00,0x00,0x00] -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU buffer_atomic_fmax_x2 v[5:6], off, s[0:3], s0 offset:4095 // SICI: buffer_atomic_fmax_x2 v[5:6], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x05,0x00,0x00] -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU buffer_atomic_fmax_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 // SICI: buffer_atomic_fmax_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x80,0xe1,0x00,0x00,0x00,0x00] -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU buffer_atomic_fmin v0, v[0:1], s[0:3], s0 addr64 offset:4095 // SICI: buffer_atomic_fmin v0, v[0:1], s[0:3], s0 addr64 offset:4095 ; encoding: [0xff,0x8f,0xfc,0xe0,0x00,0x00,0x00,0x00] -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU buffer_atomic_fmin v0, off, s[0:3], s0 // SICI: buffer_atomic_fmin v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00] @@ -765,15 +765,15 @@ buffer_atomic_fmin v0, off, s[0:3], s0 buffer_atomic_fmin v0, off, s[0:3], s0 offset:0 // SICI: buffer_atomic_fmin v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00] -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU buffer_atomic_fmin_x2 v[0:1], off, s[0:3], s0 offset:4095 slc // SICI: buffer_atomic_fmin_x2 v[0:1], off, s[0:3], s0 offset:4095 slc ; encoding: [0xff,0x0f,0x7c,0xe1,0x00,0x00,0x40,0x00] -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU buffer_atomic_fmin_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 // SICI: buffer_atomic_fmin_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x7c,0xe1,0x00,0x00,0x00,0x00] -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU //===----------------------------------------------------------------------===// // Lds support @@ -836,11 +836,11 @@ buffer_store_lds_dword s[4:7], s0 lds // VI: buffer_store_lds_dword s[4:7], s0 lds ; encoding: [0x00,0x00,0xf5,0xe0,0x00,0x00,0x01,0x00] buffer_store_lds_dword s[4:7], s0 offset:4095 lds -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI: buffer_store_lds_dword s[4:7], s0 offset:4095 lds ; encoding: [0xff,0x0f,0xf5,0xe0,0x00,0x00,0x01,0x00] buffer_store_lds_dword s[4:7], s8 offset:4 lds glc slc -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI: buffer_store_lds_dword s[4:7], s8 offset:4 lds glc slc ; encoding: [0x04,0x40,0xf7,0xe0,0x00,0x00,0x01,0x08] buffer_load_dwordx2 v[1:2], off, s[4:7], s1 lds @@ -866,9 +866,9 @@ buffer_load_dword v5, off, s[8:11], s3 tfe lds // NOSICIVI: error: invalid operand for instruction buffer_store_lds_dword s[4:7], s8 offset:4 lds tfe -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction buffer_store_lds_dword s[4:7], s8 offset:4 tfe lds -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/out-of-range-registers.s b/llvm/test/MC/AMDGPU/out-of-range-registers.s index e350fc5de5207..6ca592d8083ff 100644 --- a/llvm/test/MC/AMDGPU/out-of-range-registers.s +++ b/llvm/test/MC/AMDGPU/out-of-range-registers.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICIVI9-ERR,SIVICI-ERR,SI-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICIVI9-ERR,SIVICI-ERR,CIVI9-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICIVI9-ERR,SIVICI-ERR,CIVI9-ERR,VI-ERR --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,GFX9-ERR,SICIVI9-ERR,CIVI9-ERR --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,GFX10-ERR --implicit-check-not=error: %s @@ -20,10 +20,16 @@ s_add_i32 s105, s0, s1 // GFX10: s_add_i32 s105, s0, s1 ; encoding: v_add_i32 v256, v0, v1 -// GCN-ERR: error: register index is out of range +// GFX10-ERR: error: instruction not supported on this GPU +// GFX9-ERR: error: register index is out of range +// SI-ERR: error: register index is out of range +// VI-ERR: error: instruction not supported on this GPU v_add_i32 v257, v0, v1 -// GCN-ERR: error: register index is out of range +// GFX10-ERR: error: instruction not supported on this GPU +// GFX9-ERR: error: register index is out of range +// SI-ERR: error: register index is out of range +// VI-ERR: error: instruction not supported on this GPU s_mov_b64 s[0:17], -1 // GCN-ERR: error: invalid or unsupported register size diff --git a/llvm/test/MC/AMDGPU/smem.s b/llvm/test/MC/AMDGPU/smem.s index 5f00a820ee023..cbdcbc99cda9c 100644 --- a/llvm/test/MC/AMDGPU/smem.s +++ b/llvm/test/MC/AMDGPU/smem.s @@ -10,7 +10,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOVI -check-prefix=NOSICIVIGFX10 -check-prefix=NOSICIVIGFX1030 --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=NOGFX9 -check-prefix=NOGFX9GFX1012 --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck -check-prefix=NOSICIGFX10 -check-prefix=NOGFX9 -check-prefix=NOGFX9GFX1012 --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 %s 2>&1 | FileCheck -check-prefix=NOSICIGFX1030 -check-prefix=NOSICIVIGFX10 -check-prefix=NOSICIVIGFX1030 -check-prefix=NOSICIGFX10 -check-prefix=NOGFX9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 %s 2>&1 | FileCheck -check-prefix=NOSICIGFX1030 -check-prefix=NOSICIVIGFX10 -check-prefix=NOSICIVIGFX1030 -check-prefix=NOSICIGFX10 -check-prefix=NOGFX9 -check-prefix=NOGFX1030 --implicit-check-not=error: %s s_dcache_wb // GFX89: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00] @@ -71,7 +71,7 @@ s_store_dword s1, s[2:3], 0xfc s_store_dword s1, s[2:3], 0xfc glc // GFX89: s_store_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x43,0xc0,0xfc,0x00,0x00,0x00] // GFX1012: s_store_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x41,0xf4,0xfc,0x00,0x00,0xfa] -// NOSICIGFX1030: error: invalid operand for instruction +// NOSICIGFX1030: error: instruction not supported on this GPU s_store_dword s1, s[2:3], s4 // GFX89: s_store_dword s1, s[2:3], s4 ; encoding: [0x41,0x00,0x40,0xc0,0x04,0x00,0x00,0x00] @@ -81,27 +81,31 @@ s_store_dword s1, s[2:3], s4 s_store_dword s1, s[2:3], s4 glc // GFX89: s_store_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x41,0xc0,0x04,0x00,0x00,0x00] // GFX1012: s_store_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x41,0xf4,0x00,0x00,0x00,0x08] -// NOSICIGFX1030: error: invalid operand for instruction +// NOSICIGFX1030: error: instruction not supported on this GPU s_store_dword tba_lo, s[2:3], s4 // VI: s_store_dword tba_lo, s[2:3], s4 ; encoding: [0x01,0x1b,0x40,0xc0,0x04,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: register not available on this GPU +// NOGFX9GFX1012: error: register not available on this GPU +// NOGFX1030: error: instruction not supported on this GPU s_store_dword tba_hi, s[2:3], s4 // VI: s_store_dword tba_hi, s[2:3], s4 ; encoding: [0x41,0x1b,0x40,0xc0,0x04,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: register not available on this GPU +// NOGFX9GFX1012: error: register not available on this GPU +// NOGFX1030: error: instruction not supported on this GPU s_store_dword tma_lo, s[2:3], s4 // VI: s_store_dword tma_lo, s[2:3], s4 ; encoding: [0x81,0x1b,0x40,0xc0,0x04,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: register not available on this GPU +// NOGFX9GFX1012: error: register not available on this GPU +// NOGFX1030: error: instruction not supported on this GPU s_store_dword tma_hi, s[2:3], s4 // VI: s_store_dword tma_hi, s[2:3], s4 ; encoding: [0xc1,0x1b,0x40,0xc0,0x04,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: register not available on this GPU +// NOGFX9GFX1012: error: register not available on this GPU +// NOGFX1030: error: instruction not supported on this GPU // FIXME: Should error on SI instead of silently ignoring glc s_load_dword s1, s[2:3], 0xfc glc @@ -112,6 +116,7 @@ s_load_dword s1, s[2:3], 0xfc glc s_load_dword s1, s[2:3], s4 glc // GFX89: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x01,0xc0,0x04,0x00,0x00,0x00] // GFX10: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x01,0xf4,0x00,0x00,0x00,0x08] +// SICI: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x04,0x82,0x00,0xc0] s_buffer_store_dword s10, s[92:95], m0 // GFX89: s_buffer_store_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x60,0xc0,0x7c,0x00,0x00,0x00] @@ -121,22 +126,26 @@ s_buffer_store_dword s10, s[92:95], m0 s_buffer_store_dword tba_lo, s[92:95], m0 // VI: s_buffer_store_dword tba_lo, s[92:95], m0 ; encoding: [0x2e,0x1b,0x60,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: register not available on this GPU +// NOGFX9GFX1012: error: register not available on this GPU +// NOGFX1030: error: instruction not supported on this GPU s_buffer_store_dword tba_hi, s[92:95], m0 // VI: s_buffer_store_dword tba_hi, s[92:95], m0 ; encoding: [0x6e,0x1b,0x60,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: register not available on this GPU +// NOGFX9GFX1012: error: register not available on this GPU +// NOGFX1030: error: instruction not supported on this GPU s_buffer_store_dword tma_lo, s[92:95], m0 // VI: s_buffer_store_dword tma_lo, s[92:95], m0 ; encoding: [0xae,0x1b,0x60,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: register not available on this GPU +// NOGFX9GFX1012: error: register not available on this GPU +// NOGFX1030: error: instruction not supported on this GPU s_buffer_store_dword tma_hi, s[92:95], m0 // VI: s_buffer_store_dword tma_hi, s[92:95], m0 ; encoding: [0xee,0x1b,0x60,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: register not available on this GPU +// NOGFX9GFX1012: error: register not available on this GPU +// NOGFX1030: error: instruction not supported on this GPU s_buffer_store_dword ttmp0, s[92:95], m0 // VI: s_buffer_store_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1c,0x60,0xc0,0x7c,0x00,0x00,0x00] @@ -151,13 +160,14 @@ s_buffer_store_dwordx2 s[10:11], s[92:95], m0 s_buffer_store_dwordx4 s[8:11], s[92:95], m0 glc // GFX89: s_buffer_store_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x69,0xc0,0x7c,0x00,0x00,0x00] -// NOSICIGFX1030: error: invalid operand for instruction +// NOSICIGFX1030: error: instruction not supported on this GPU // GFX1012: s_buffer_store_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x69,0xf4,0x00,0x00,0x00,0xf8] s_buffer_store_dwordx2 tba, s[92:95], m0 glc // VI: s_buffer_store_dwordx2 tba, s[92:95], m0 glc ; encoding: [0x2e,0x1b,0x65,0xc0,0x7c,0x00,0x00,0x00] -// NOSICI: error: invalid operand for instruction -// NOGFX9: error: register not available on this GPU +// NOSICI: error: instruction not supported on this GPU +// NOGFX9GFX1012: error: register not available on this GPU +// NOGFX1030: error: instruction not supported on this GPU s_buffer_load_dword s10, s[92:95], m0 // GFX89: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x20,0xc0,0x7c,0x00,0x00,0x00] @@ -215,6 +225,7 @@ s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc // GFX89: s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x29,0xc0,0x7c,0x00,0x00,0x00] // GFX10: s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x29,0xf4,0x00,0x00,0x00,0xf8] +// SICI: s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x7c,0x5c,0x84,0xc2] //===----------------------------------------------------------------------===// // s_scratch instructions @@ -228,7 +239,7 @@ s_scratch_load_dword s5, s[2:3], s101 s_scratch_load_dword s5, s[2:3], s0 glc // GFX9: s_scratch_load_dword s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x15,0xc0,0x00,0x00,0x00,0x00] // GFX1012: s_scratch_load_dword s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x15,0xf4,0x00,0x00,0x00,0x00] -// NOSICIVIGFX1030: error: invalid operand for instruction +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_scratch_load_dwordx2 s[100:101], s[2:3], s0 // GFX9: s_scratch_load_dwordx2 s[100:101], s[2:3], s0 ; encoding: [0x01,0x19,0x18,0xc0,0x00,0x00,0x00,0x00] @@ -238,7 +249,7 @@ s_scratch_load_dwordx2 s[100:101], s[2:3], s0 s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc // GFX9: s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc ; encoding: [0x81,0x02,0x1b,0xc0,0x01,0x00,0x00,0x00] // GFX1012: s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc ; encoding: [0x81,0x02,0x19,0xf4,0x01,0x00,0x00,0xfa] -// NOSICIVIGFX1030: error: invalid operand for instruction +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_scratch_load_dwordx4 s[20:23], s[4:5], s0 // GFX9: s_scratch_load_dwordx4 s[20:23], s[4:5], s0 ; encoding: [0x02,0x05,0x1c,0xc0,0x00,0x00,0x00,0x00] @@ -253,17 +264,17 @@ s_scratch_store_dword s101, s[4:5], s0 s_scratch_store_dword s1, s[4:5], 0x123 glc // GFX9: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x57,0xc0,0x23,0x01,0x00,0x00] // GFX1012: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa] -// NOSICIVIGFX1030: error: invalid operand for instruction +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc // GFX9: s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc ; encoding: [0x82,0x00,0x59,0xc0,0x65,0x00,0x00,0x00] // GFX1012: s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc ; encoding: [0x82,0x00,0x59,0xf4,0x00,0x00,0x00,0xca] -// NOSICIVIGFX1030: error: invalid operand for instruction +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc // GFX9: s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc ; encoding: [0x02,0x01,0x5d,0xc0,0x00,0x00,0x00,0x00] // GFX1012: s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc ; encoding: [0x02,0x01,0x5d,0xf4,0x00,0x00,0x00,0x00] -// NOSICIVIGFX1030: error: invalid operand for instruction +// NOSICIVIGFX1030: error: instruction not supported on this GPU //===----------------------------------------------------------------------===// // s_dcache_discard instructions diff --git a/llvm/test/MC/AMDGPU/sop1.s b/llvm/test/MC/AMDGPU/sop1.s index 3b0bafd4ae2c2..494daab380f24 100644 --- a/llvm/test/MC/AMDGPU/sop1.s +++ b/llvm/test/MC/AMDGPU/sop1.s @@ -306,12 +306,12 @@ s_cbranch_join s4 s_cbranch_join 1 // NOSICI: error: invalid operand for instruction // NOGFX89: error: invalid operand for instruction -// GFX10-ERR: error: invalid operand for instruction +// GFX10-ERR: error: instruction not supported on this GPU s_cbranch_join 100 // NOSICI: error: invalid operand for instruction // NOGFX89: error: invalid operand for instruction -// GFX10-ERR: error: invalid operand for instruction +// GFX10-ERR: error: instruction not supported on this GPU s_abs_i32 s1, s2 // SICI: s_abs_i32 s1, s2 ; encoding: [0x02,0x34,0x81,0xbe] diff --git a/llvm/test/MC/AMDGPU/sopc.s b/llvm/test/MC/AMDGPU/sopc.s index 3ef217798a2ef..fd52b6daed4e0 100644 --- a/llvm/test/MC/AMDGPU/sopc.s +++ b/llvm/test/MC/AMDGPU/sopc.s @@ -76,51 +76,51 @@ s_cmp_lg_u64 s[0:1], s[2:3] gpr_idx = 1 s_set_gpr_idx_on s0, gpr_idx // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; encoding: [0x00,0x01,0x11,0xbf] -// NOSICI: error: invalid operand for instruction -// GFX10-ERR: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// GFX10-ERR: error: instruction not supported on this GPU gpr_idx_mode = 10 s_set_gpr_idx_on s0, gpr_idx_mode + 5 // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0,SRC1,SRC2,DST) ; encoding: [0x00,0x0f,0x11,0xbf] -// NOSICI: error: invalid operand for instruction -// GFX10-ERR: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// GFX10-ERR: error: instruction not supported on this GPU s_set_gpr_idx_on s0, 0 // VI: s_set_gpr_idx_on s0, gpr_idx() ; encoding: [0x00,0x00,0x11,0xbf] -// NOSICI: error: invalid operand for instruction -// GFX10-ERR: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// GFX10-ERR: error: instruction not supported on this GPU s_set_gpr_idx_on s0, gpr_idx() // VI: s_set_gpr_idx_on s0, gpr_idx() ; encoding: [0x00,0x00,0x11,0xbf] -// NOSICI: error: unknown token in expression -// GFX10-ERR: error: unknown token in expression +// NOSICI: error: instruction not supported on this GPU +// GFX10-ERR: error: instruction not supported on this GPU s_set_gpr_idx_on s0, 1 // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; encoding: [0x00,0x01,0x11,0xbf] -// NOSICI: error: invalid operand for instruction -// GFX10-ERR: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// GFX10-ERR: error: instruction not supported on this GPU s_set_gpr_idx_on s0, gpr_idx(SRC0) // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; encoding: [0x00,0x01,0x11,0xbf] -// NOSICI: error: invalid operand for instruction -// GFX10-ERR: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// GFX10-ERR: error: instruction not supported on this GPU s_set_gpr_idx_on s0, 3 // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0,SRC1) ; encoding: [0x00,0x03,0x11,0xbf] -// NOSICI: error: invalid operand for instruction -// GFX10-ERR: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// GFX10-ERR: error: instruction not supported on this GPU s_set_gpr_idx_on s0, gpr_idx(SRC1,SRC0) // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0,SRC1) ; encoding: [0x00,0x03,0x11,0xbf] -// NOSICI: error: expected ')' in parentheses expression -// GFX10-ERR: error: expected ')' in parentheses expression +// NOSICI: error: instruction not supported on this GPU +// GFX10-ERR: error: instruction not supported on this GPU s_set_gpr_idx_on s0, 15 // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0,SRC1,SRC2,DST) ; encoding: [0x00,0x0f,0x11,0xbf] -// NOSICI: error: invalid operand for instruction -// GFX10-ERR: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// GFX10-ERR: error: instruction not supported on this GPU s_set_gpr_idx_on s0, gpr_idx(SRC0,DST,SRC2,SRC1) // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0,SRC1,SRC2,DST) ; encoding: [0x00,0x0f,0x11,0xbf] -// NOSICI: error: expected ')' in parentheses expression -// GFX10-ERR: error: expected ')' in parentheses expression +// NOSICI: error: instruction not supported on this GPU +// GFX10-ERR: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/sopk.s b/llvm/test/MC/AMDGPU/sopk.s index 14523dcec8567..f8c1d0ea89a4a 100644 --- a/llvm/test/MC/AMDGPU/sopk.s +++ b/llvm/test/MC/AMDGPU/sopk.s @@ -360,7 +360,7 @@ s_endpgm_ordered_ps_done s_call_b64 null, 12609 // GFX10: s_call_b64 null, 12609 ; encoding: [0x41,0x31,0x7d,0xbb] -// NOSICIVI: error: 'null' operand is not supported on this GPU +// NOSICIVI: error: instruction not supported on this GPU // NOGFX9: error: 'null' operand is not supported on this GPU s_call_b64 s[12:13], 12609 diff --git a/llvm/test/MC/AMDGPU/sopp.s b/llvm/test/MC/AMDGPU/sopp.s index 63783f61c6bf1..05f463872039a 100644 --- a/llvm/test/MC/AMDGPU/sopp.s +++ b/llvm/test/MC/AMDGPU/sopp.s @@ -361,19 +361,19 @@ s_set_gpr_idx_off s_set_gpr_idx_mode 0 // VI: s_set_gpr_idx_mode gpr_idx() ; encoding: [0x00,0x00,0x9d,0xbf] -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU s_set_gpr_idx_mode gpr_idx() // VI: s_set_gpr_idx_mode gpr_idx() ; encoding: [0x00,0x00,0x9d,0xbf] -// NOSICI: error: unknown token in expression +// NOSICI: error: instruction not supported on this GPU s_set_gpr_idx_mode 15 // VI: s_set_gpr_idx_mode gpr_idx(SRC0,SRC1,SRC2,DST) ; encoding: [0x0f,0x00,0x9d,0xbf] -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU s_set_gpr_idx_mode gpr_idx(SRC2,SRC1,SRC0,DST) // VI: s_set_gpr_idx_mode gpr_idx(SRC0,SRC1,SRC2,DST) ; encoding: [0x0f,0x00,0x9d,0xbf] -// NOSICI: error: expected ')' in parentheses expression +// NOSICI: error: instruction not supported on this GPU s_endpgm_saved // VI: s_endpgm_saved ; encoding: [0x00,0x00,0x9b,0xbf] diff --git a/llvm/test/MC/AMDGPU/vop1-gfx9-err.s b/llvm/test/MC/AMDGPU/vop1-gfx9-err.s index 9345632855379..42feac2f0aa21 100644 --- a/llvm/test/MC/AMDGPU/vop1-gfx9-err.s +++ b/llvm/test/MC/AMDGPU/vop1-gfx9-err.s @@ -3,25 +3,35 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck -check-prefixes=GCN,CI --implicit-check-not=error: %s v_swap_b32 v1, 1 -// GCN: :16: error: invalid operand for instruction +// CI: error: instruction not supported on this GPU +// GFX9: error: invalid operand for instruction +// VI: error: instruction not supported on this GPU v_swap_b32 v1, s0 -// GCN: :16: error: invalid operand for instruction +// CI: error: instruction not supported on this GPU +// GFX9: error: invalid operand for instruction +// VI: error: instruction not supported on this GPU // FIXME: Better error for it requiring VOP1 encoding v_swap_b32_e64 v1, v2 -// GFX9: :1: error: invalid instruction, did you mean: v_swap_b32? -// CI: :1: error: invalid instruction -// VI: :1: error: invalid instruction +// GFX9: :1: error: e64 variant of this instruction is not supported +// CI: :1: error: instruction not supported on this GPU +// VI: :1: error: instruction not supported on this GPU v_swap_b32 v1, v2, v1 -// GCN: :20: error: invalid operand for instruction +// CI: error: instruction not supported on this GPU +// GFX9: error: invalid operand for instruction +// VI: error: instruction not supported on this GPU v_swap_b32 v1, v2, v2 -// GCN: :20: error: invalid operand for instruction +// CI: error: instruction not supported on this GPU +// GFX9: error: invalid operand for instruction +// VI: error: instruction not supported on this GPU v_swap_b32 v1, v2, v2, v2 -// GCN: :20: error: invalid operand for instruction +// CI: error: instruction not supported on this GPU +// GFX9: error: invalid operand for instruction +// VI: error: instruction not supported on this GPU v_swap_codegen_pseudo_b32 v1, v2 // GCN: :1: error: invalid instruction diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s index b2893154dd6dd..db93478476b80 100644 --- a/llvm/test/MC/AMDGPU/vop2.s +++ b/llvm/test/MC/AMDGPU/vop2.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=VI // RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: @@ -16,22 +16,27 @@ // _e32 suffix // SICI: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06] +// VI: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x02] v_add_f32_e32 v1, v2, v3 // src0 inline immediate // SICI: v_add_f32_e32 v1, 1.0, v3 ; encoding: [0xf2,0x06,0x02,0x06] +// VI: v_add_f32_e32 v1, 1.0, v3 ; encoding: [0xf2,0x06,0x02,0x02] v_add_f32 v1, 1.0, v3 // src0 negative inline immediate // SICI: v_add_f32_e32 v1, -1.0, v3 ; encoding: [0xf3,0x06,0x02,0x06] +// VI: v_add_f32_e32 v1, -1.0, v3 ; encoding: [0xf3,0x06,0x02,0x02] v_add_f32 v1, -1.0, v3 // src0 literal // SICI: v_add_f32_e32 v1, 0x42c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0x42] +// VI: v_add_f32_e32 v1, 0x42c80000, v3 ; encoding: [0xff,0x06,0x02,0x02,0x00,0x00,0xc8,0x42] v_add_f32 v1, 100.0, v3 // src0 negative literal // SICI: v_add_f32_e32 v1, 0xc2c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0xc2] +// VI: v_add_f32_e32 v1, 0xc2c80000, v3 ; encoding: [0xff,0x06,0x02,0x02,0x00,0x00,0xc8,0xc2] v_add_f32 v1, -100.0, v3 //===----------------------------------------------------------------------===// @@ -40,34 +45,42 @@ v_add_f32 v1, -100.0, v3 // _e32 suffix // SICI: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12] +// VI: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0c] v_mul_i32_i24_e32 v1, v2, v3 // _e64 suffix // SICI: v_mul_i32_i24_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x02,0x00] +// VI: v_mul_i32_i24_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x06,0xd1,0x02,0x07,0x02,0x00] v_mul_i32_i24_e64 v1, v2, v3 // src0 inline // SICI: v_mul_i32_i24_e32 v1, 3, v3 ; encoding: [0x83,0x06,0x02,0x12] +// VI: v_mul_i32_i24_e32 v1, 3, v3 ; encoding: [0x83,0x06,0x02,0x0c] v_mul_i32_i24_e32 v1, 3, v3 // src0 negative inline // SICI: v_mul_i32_i24_e32 v1, -3, v3 ; encoding: [0xc3,0x06,0x02,0x12] +// VI: v_mul_i32_i24_e32 v1, -3, v3 ; encoding: [0xc3,0x06,0x02,0x0c] v_mul_i32_i24_e32 v1, -3, v3 // src1 inline // SICI: v_mul_i32_i24_e64 v1, v2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x01,0x00] +// VI: v_mul_i32_i24_e64 v1, v2, 3 ; encoding: [0x01,0x00,0x06,0xd1,0x02,0x07,0x01,0x00] v_mul_i32_i24_e64 v1, v2, 3 // src1 negative inline // SICI: v_mul_i32_i24_e64 v1, v2, -3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x87,0x01,0x00] +// VI: v_mul_i32_i24_e64 v1, v2, -3 ; encoding: [0x01,0x00,0x06,0xd1,0x02,0x87,0x01,0x00] v_mul_i32_i24_e64 v1, v2, -3 // src0 literal // SICI: v_mul_i32_i24_e32 v1, 0x64, v3 ; encoding: [0xff,0x06,0x02,0x12,0x64,0x00,0x00,0x00] +// VI: v_mul_i32_i24_e32 v1, 0x64, v3 ; encoding: [0xff,0x06,0x02,0x0c,0x64,0x00,0x00,0x00] v_mul_i32_i24_e32 v1, 100, v3 // src1 negative literal // SICI: v_mul_i32_i24_e32 v1, 0xffffff9c, v3 ; encoding: [0xff,0x06,0x02,0x12,0x9c,0xff,0xff,0xff] +// VI: v_mul_i32_i24_e32 v1, 0xffffff9c, v3 ; encoding: [0xff,0x06,0x02,0x0c,0x9c,0xff,0xff,0xff] v_mul_i32_i24_e32 v1, -100, v3 //===----------------------------------------------------------------------===// @@ -76,22 +89,27 @@ v_mul_i32_i24_e32 v1, -100, v3 // src0 sgpr // SICI: v_mul_i32_i24_e32 v1, s2, v3 ; encoding: [0x02,0x06,0x02,0x12] +// VI: v_mul_i32_i24_e32 v1, s2, v3 ; encoding: [0x02,0x06,0x02,0x0c] v_mul_i32_i24_e32 v1, s2, v3 // src1 sgpr // SICI: v_mul_i32_i24_e64 v1, v2, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x00,0x00] +// VI: v_mul_i32_i24_e64 v1, v2, s3 ; encoding: [0x01,0x00,0x06,0xd1,0x02,0x07,0x00,0x00] v_mul_i32_i24_e64 v1, v2, s3 // src0, src1 same sgpr // SICI: v_mul_i32_i24_e64 v1, s2, s2 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x04,0x00,0x00] +// VI: v_mul_i32_i24_e64 v1, s2, s2 ; encoding: [0x01,0x00,0x06,0xd1,0x02,0x04,0x00,0x00] v_mul_i32_i24_e64 v1, s2, s2 // src0 sgpr, src1 inline // SICI: v_mul_i32_i24_e64 v1, s2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x06,0x01,0x00] +// VI: v_mul_i32_i24_e64 v1, s2, 3 ; encoding: [0x01,0x00,0x06,0xd1,0x02,0x06,0x01,0x00] v_mul_i32_i24_e64 v1, s2, 3 // src0 inline src1 sgpr // SICI: v_mul_i32_i24_e64 v1, 3, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x83,0x06,0x00,0x00] +// VI: v_mul_i32_i24_e64 v1, 3, s3 ; encoding: [0x01,0x00,0x06,0xd1,0x83,0x06,0x00,0x00] v_mul_i32_i24_e64 v1, 3, s3 // SICI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x4a] @@ -142,7 +160,6 @@ v_subrev_f32 v1, v2, v3 // SICI: v_mac_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0c] // NOVI: error: instruction not supported on this GPU -// NOVI: v_mac_legacy_f32 v1, v2, v3 v_mac_legacy_f32 v1, v2, v3 // SICI: v_mul_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0e] @@ -171,12 +188,10 @@ v_mul_hi_u32_u24_e32 v1, v2, v3 // SICI: v_min_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1a] // NOVI: error: instruction not supported on this GPU -// NOVI: v_min_legacy_f32_e32 v1, v2, v3 v_min_legacy_f32_e32 v1, v2, v3 // SICI: v_max_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1c] // NOVI: error: instruction not supported on this GPU -// NOVI: v_max_legacy_f32 v1, v2, v3 v_max_legacy_f32 v1, v2, v3 // SICI: v_min_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1e] @@ -205,7 +220,6 @@ v_max_u32_e32 v1, v2, v3 // SICI: v_lshr_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a] // NOVI: error: instruction not supported on this GPU -// NOVI: v_lshr_b32_e32 v1, v2, v3 v_lshr_b32_e32 v1, v2, v3 // SICI: v_lshrrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c] @@ -214,7 +228,6 @@ v_lshrrev_b32_e32 v1, v2, v3 // SICI: v_ashr_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2e] // NOVI: error: instruction not supported on this GPU -// NOVI: v_ashr_i32_e32 v1, v2, v3 v_ashr_i32_e32 v1, v2, v3 // SICI: v_ashrrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x30] @@ -223,7 +236,6 @@ v_ashrrev_i32_e32 v1, v2, v3 // SICI: v_lshl_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] // NOVI: error: instruction not supported on this GPU -// NOVI: v_lshl_b32_e32 v1, v2, v3 v_lshl_b32_e32 v1, v2, v3 // SICI: v_lshlrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] @@ -335,27 +347,27 @@ v_addc_u32 v1, vcc, v2, v3, vcc v_addc_u32_e32 v1, vcc, v2, v3, vcc -// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0xaa,0x01] +// SICI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0xaa,0x01] // VI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0xaa,0x01] v_addc_u32 v1, s[0:1], v2, v3, vcc -// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00] +// SICI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00] // VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00] v_addc_u32 v1, s[0:1], v2, v3, s[2:3] -// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00] +// SICI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00] // VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00] v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] -// SI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0xaa,0x01] +// SICI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0xaa,0x01] // VI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0xaa,0x01] v_addc_u32_e64 v1, vcc, v2, v3, vcc -// SI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x52] +// SICI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x52] // VI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3a] v_subb_u32 v1, vcc, v2, v3, vcc -// SI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0xaa,0x01] +// SICI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0xaa,0x01] // VI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0xaa,0x01] v_subb_u32 v1, s[0:1], v2, v3, vcc @@ -396,121 +408,97 @@ v_cvt_pk_u16_u32_e64 v1, v2, v3 v_cvt_pk_i16_i32_e64 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_add_f16_e32 v1, v2, v3 // VI: v_add_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e] v_add_f16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_sub_f16_e32 v1, v2, v3 // VI: v_sub_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x40] v_sub_f16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_subrev_f16_e32 v1, v2, v3 // VI: v_subrev_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x42] v_subrev_f16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_mul_f16_e32 v1, v2, v3 // VI: v_mul_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44] v_mul_f16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_mac_f16_e32 v1, v2, v3 // VI: v_mac_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46] v_mac_f16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_madmk_f16 v1, v2, 64.0, v3 // VI: v_madmk_f16 v1, v2, 0x5400, v3 ; encoding: [0x02,0x07,0x02,0x48,0x00,0x54,0x00,0x00] v_madmk_f16 v1, v2, 64.0, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_madak_f16 v1, v2, v3, 64.0 // VI: v_madak_f16 v1, v2, v3, 0x5400 ; encoding: [0x02,0x07,0x02,0x4a,0x00,0x54,0x00,0x00] v_madak_f16 v1, v2, v3, 64.0 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_add_u16_e32 v1, v2, v3 // VI: v_add_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] v_add_u16_e32 v1, v2, v3 -// NOSICI: error: invalid operand for instruction -// NOSICI: v_add_u16 v1, v2, v3 clamp +// NOSICI: error: instruction not supported on this GPU // VI: v_add_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x26,0xd1,0x02,0x07,0x02,0x00] v_add_u16 v1, v2, v3 clamp // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_sub_u16_e32 v1, v2, v3 // VI: v_sub_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] v_sub_u16_e32 v1, v2, v3 -// NOSICI: error: invalid operand for instruction -// NOSICI: v_sub_u16 v1, v2, v3 clamp +// NOSICI: error: instruction not supported on this GPU // VI: v_sub_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x27,0xd1,0x02,0x07,0x02,0x00] v_sub_u16 v1, v2, v3 clamp // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_subrev_u16_e32 v1, v2, v3 // VI: v_subrev_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] v_subrev_u16_e32 v1, v2, v3 -// NOSICI: error: invalid operand for instruction -// NOSICI: v_subrev_u16 v1, v2, v3 clamp +// NOSICI: error: instruction not supported on this GPU // VI: v_subrev_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x28,0xd1,0x02,0x07,0x02,0x00] v_subrev_u16 v1, v2, v3 clamp // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_mul_lo_u16_e32 v1, v2, v3 // VI: v_mul_lo_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] v_mul_lo_u16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_lshlrev_b16_e32 v1, v2, v3 // VI: v_lshlrev_b16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54] v_lshlrev_b16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_lshrrev_b16_e32 v1, v2, v3 // VI: v_lshrrev_b16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56] v_lshrrev_b16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_ashrrev_i16_e32 v1, v2, v3 // VI: v_ashrrev_i16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x58] v_ashrrev_i16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_max_f16_e32 v1, v2, v3 // VI: v_max_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5a] v_max_f16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_min_f16_e32 v1, v2, v3 // VI: v_min_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5c] v_min_f16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_max_u16_e32 v1, v2, v3 // VI: v_max_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5e] v_max_u16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_max_i16_e32 v1, v2, v3 // VI: v_max_i16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60] v_max_i16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_min_u16_e32 v1, v2, v3 // VI: v_min_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62] v_min_u16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_min_i16_e32 v1, v2, v3 // VI: v_min_i16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x64] v_min_i16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_ldexp_f16_e32 v1, v2, v3 // VI: v_ldexp_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x66] v_ldexp_f16_e32 v1, v2, v3 diff --git a/llvm/test/MC/AMDGPU/vop3-errs.s b/llvm/test/MC/AMDGPU/vop3-errs.s index 01cbb130f95c6..2d59d55ce9a26 100644 --- a/llvm/test/MC/AMDGPU/vop3-errs.s +++ b/llvm/test/MC/AMDGPU/vop3-errs.s @@ -58,29 +58,37 @@ v_cvt_f64_i32 v[5:6], s1 mul:3 // v_interp_mov_f32_e64 v5, p10, attr0.x high -// GCN: error: invalid operand for instruction +// GFX67: error: e64 variant of this instruction is not supported +// GFX89: error: invalid operand for instruction v_interp_mov_f32_e64 v5, p10, attr0.x v0 -// GCN: error: invalid operand for instruction +// GFX67: error: e64 variant of this instruction is not supported +// GFX89: error: invalid operand for instruction v_interp_p1_f32_e64 v5, v2, attr0.x high -// GCN: error: invalid operand for instruction +// GFX67: error: e64 variant of this instruction is not supported +// GFX89: error: invalid operand for instruction v_interp_p1_f32_e64 v5, v2, attr0.x v0 -// GCN: error: invalid operand for instruction +// GFX67: error: e64 variant of this instruction is not supported +// GFX89: error: invalid operand for instruction v_interp_p2_f32_e64 v255, v2, attr0.x high -// GCN: error: invalid operand for instruction +// GFX67: error: e64 variant of this instruction is not supported +// GFX89: error: invalid operand for instruction v_interp_p2_f32_e64 v255, v2, attr0.x v0 -// GCN: error: invalid operand for instruction +// GFX67: error: e64 variant of this instruction is not supported +// GFX89: error: invalid operand for instruction v_interp_p1ll_f16 v5, p0, attr31.x -// GCN: error: invalid operand for instruction +// GFX67: error: instruction not supported on this GPU +// GFX89: error: invalid operand for instruction v_interp_p1ll_f16 v5, v2, attr31.x v0 -// GCN: error: invalid operand for instruction +// GFX67: error: instruction not supported on this GPU +// GFX89: error: invalid operand for instruction v_interp_p2_f16 v5, v2, attr1.x, v3 mul:2 -// GFX67: error: not a valid operand +// GFX67: error: instruction not supported on this GPU // GFX89: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/vop3-gfx9.s b/llvm/test/MC/AMDGPU/vop3-gfx9.s index c98fc47093f83..5b1c7bdbaf133 100644 --- a/llvm/test/MC/AMDGPU/vop3-gfx9.s +++ b/llvm/test/MC/AMDGPU/vop3-gfx9.s @@ -34,15 +34,15 @@ v_pack_b32_f16 v1, v2, v3 v_pack_b32_f16 v5, v1, v2 op_sel:[1,0,0] // GFX9: v_pack_b32_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0xa0,0xd2,0x01,0x05,0x02,0x00] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_pack_b32_f16 v5, v1, v2 op_sel:[0,1,0] // GFX9: v_pack_b32_f16 v5, v1, v2 op_sel:[0,1,0] ; encoding: [0x05,0x10,0xa0,0xd2,0x01,0x05,0x02,0x00] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_pack_b32_f16 v5, v1, v2 op_sel:[0,0,1] // GFX9: v_pack_b32_f16 v5, v1, v2 op_sel:[0,0,1] ; encoding: [0x05,0x40,0xa0,0xd2,0x01,0x05,0x02,0x00] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_xad_u32 v1, v2, v3, v4 // GFX9: v_xad_u32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf3,0xd1,0x02,0x07,0x12,0x04] @@ -66,27 +66,27 @@ v_max3_f16 v1, v2, v3, v4 v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,0,0] // GFX9: v_max3_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf7,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_max3_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xf7,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_max3_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0xf7,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0xf7,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf7,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_max3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xf7,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_max3_i16 v1, v2, v3, v4 // GFX9: v_max3_i16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf8,0xd1,0x02,0x07,0x12,0x04] @@ -94,27 +94,27 @@ v_max3_i16 v1, v2, v3, v4 v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,0,0] // GFX9: v_max3_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf8,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_max3_i16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xf8,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_max3_i16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0xf8,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0xf8,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf8,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_max3_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xf8,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_max3_u16 v1, v2, v3, v4 // GFX9: v_max3_u16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf9,0xd1,0x02,0x07,0x12,0x04] @@ -138,23 +138,23 @@ v_mad_u32_u16 v5, v1, v2, v3 v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xf1,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0xf1,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0xf1,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf1,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xf1,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_mad_i32_i16 v5, v1, v2, v3 // GFX9: v_mad_i32_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf2,0xd1,0x01,0x05,0x0e,0x04] @@ -162,7 +162,7 @@ v_mad_i32_i16 v5, v1, v2, v3 v_mad_i32_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_i32_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf2,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_cvt_pknorm_i16_f16 v5, v1, v2 // GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x00] @@ -170,47 +170,47 @@ v_cvt_pknorm_i16_f16 v5, v1, v2 v_cvt_pknorm_i16_f16 v5, -v1, v2 // GFX9: v_cvt_pknorm_i16_f16 v5, -v1, v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x20] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_cvt_pknorm_i16_f16 v5, v1, -v2 // GFX9: v_cvt_pknorm_i16_f16 v5, v1, -v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x40] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_cvt_pknorm_i16_f16 v5, -v1, -v2 // GFX9: v_cvt_pknorm_i16_f16 v5, -v1, -v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x60] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_cvt_pknorm_i16_f16 v5, |v1|, v2 // GFX9: v_cvt_pknorm_i16_f16 v5, |v1|, v2 ; encoding: [0x05,0x01,0x99,0xd2,0x01,0x05,0x02,0x00] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_cvt_pknorm_i16_f16 v5, v1, |v2| // GFX9: v_cvt_pknorm_i16_f16 v5, v1, |v2| ; encoding: [0x05,0x02,0x99,0xd2,0x01,0x05,0x02,0x00] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[0,0,0] // GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x00] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,0,0] // GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x99,0xd2,0x01,0x05,0x02,0x00] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,1,1] // GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x99,0xd2,0x01,0x05,0x02,0x00] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_cvt_pknorm_u16_f16 v5, -v1, -v2 // GFX9: v_cvt_pknorm_u16_f16 v5, -v1, -v2 ; encoding: [0x05,0x00,0x9a,0xd2,0x01,0x05,0x02,0x60] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_cvt_pknorm_u16_f16 v5, |v1|, |v2| // GFX9: v_cvt_pknorm_u16_f16 v5, |v1|, |v2| ; encoding: [0x05,0x03,0x9a,0xd2,0x01,0x05,0x02,0x00] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_cvt_pknorm_u16_f16 v5, v1, v2 op_sel:[1,1,1] // GFX9: v_cvt_pknorm_u16_f16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x9a,0xd2,0x01,0x05,0x02,0x00] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_add_i16 v5, v1, v2 // GFX9: v_add_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x9e,0xd2,0x01,0x05,0x02,0x00] @@ -218,7 +218,7 @@ v_add_i16 v5, v1, v2 v_add_i16 v5, v1, v2 op_sel:[1,1,1] // GFX9: v_add_i16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x9e,0xd2,0x01,0x05,0x02,0x00] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_sub_i16 v5, v1, v2 // GFX9: v_sub_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x9f,0xd2,0x01,0x05,0x02,0x00] @@ -226,11 +226,11 @@ v_sub_i16 v5, v1, v2 v_sub_i16 v5, v1, v2 op_sel:[1,1,1] // GFX9: v_sub_i16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x9f,0xd2,0x01,0x05,0x02,0x00] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_sub_i16 v5, v1, v2 clamp // GFX9: v_sub_i16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x9f,0xd2,0x01,0x05,0x02,0x00] -// NOGCN: error: invalid operand for instruction +// NOGCN: error: instruction not supported on this GPU v_fma_f16_e64 v5, v1, v2, v3 // GFX9: v_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x04] @@ -238,29 +238,29 @@ v_fma_f16_e64 v5, v1, v2, v3 v_fma_f16 v5, v1, -v2, v3 // GFX9: v_fma_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x44] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU v_fma_f16 v5, v1, v2, |v3| // GFX9: v_fma_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0x06,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU v_fma_f16 v5, v1, v2, v3 clamp // GFX9: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU v_fma_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x06,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_fma_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x06,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x06,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_fma_legacy_f16_e64 v5, v1, v2, v3 @@ -269,15 +269,15 @@ v_fma_legacy_f16_e64 v5, v1, v2, v3 v_fma_legacy_f16 v5, -v1, v2, v3 // GFX9: v_fma_legacy_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x24] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_fma_legacy_f16 v5, v1, |v2|, v3 // GFX9: v_fma_legacy_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xee,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_fma_legacy_f16 v5, v1, v2, v3 clamp // GFX9: v_fma_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: invalid operand for instruction +// NOGCN: error: instruction not supported on this GPU v_div_fixup_f16_e64 v5, 0.5, v2, v3 // GFX9: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0xf0,0x04,0x0e,0x04] @@ -293,29 +293,29 @@ v_div_fixup_f16 v5, v1, v2, 0.5 v_div_fixup_f16 v5, -v1, v2, v3 // GFX9: v_div_fixup_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0x24] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, |v1|, v2, v3 // GFX9: v_div_fixup_f16 v5, |v1|, v2, v3 ; encoding: [0x05,0x01,0x07,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, v2, v3 clamp // GFX9: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x07,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x07,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x07,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_div_fixup_legacy_f16_e64 v5, 0.5, v2, v3 @@ -332,15 +332,15 @@ v_div_fixup_legacy_f16 v5, v1, v2, 0.5 v_div_fixup_legacy_f16 v5, -v1, v2, v3 // GFX9: v_div_fixup_legacy_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x24] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_div_fixup_legacy_f16 v5, v1, |v2|, v3 // GFX9: v_div_fixup_legacy_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xef,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp // GFX9: v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: invalid operand for instruction +// NOGCN: error: instruction not supported on this GPU v_mad_f16_e64 v5, 0.5, v2, v3 // GFX9: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0xf0,0x04,0x0e,0x04] @@ -356,45 +356,45 @@ v_mad_f16 v5, v1, v2, 0.5 v_mad_f16 v5, v1, v2, -v3 // GFX9: v_mad_f16 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0x84] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, |v3| // GFX9: v_mad_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0x03,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,0] // GFX9: v_mad_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x03,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x03,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x03,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x03,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x03,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 clamp // GFX9: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU v_mad_i16_e64 v5, 0, v2, v3 // GFX9: v_mad_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x80,0x04,0x0e,0x04] @@ -411,16 +411,16 @@ v_mad_i16 v5, v1, v2, -4.0 v_mad_i16 v5, v1, v2, v3 clamp // GFX9: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x05,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU v_mad_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x05,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_mad_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_mad_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x05,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_mad_legacy_f16_e64 v5, 0.5, v2, v3 @@ -437,15 +437,15 @@ v_mad_legacy_f16 v5, v1, v2, 0.5 v_mad_legacy_f16 v5, v1, -v2, v3 // GFX9: v_mad_legacy_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x44] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_f16 v5, v1, |v2|, v3 // GFX9: v_mad_legacy_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xea,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_f16 v5, v1, v2, v3 clamp // GFX9: v_mad_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04] -// NOGCN: error: invalid operand for instruction +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_i16_e64 v5, 0, v2, v3 // GFX9: v_mad_legacy_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x80,0x04,0x0e,0x04] @@ -461,7 +461,7 @@ v_mad_legacy_i16 v5, v1, v2, -4.0 v_mad_legacy_i16 v5, v1, v2, -4.0 clamp // NOGFX9: error: invalid literal operand -// NOGCN: error: invalid operand for instruction +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_u16_e64 v5, 0, v2, v3 // GFX9: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04] @@ -477,7 +477,7 @@ v_mad_legacy_u16 v5, v1, v2, -4.0 v_mad_legacy_u16 v5, v1, v2, -4.0 clamp // NOGFX9: error: invalid literal operand -// NOGCN: error: invalid operand for instruction +// NOGCN: error: instruction not supported on this GPU v_mad_u16_e64 v5, 0, v2, v3 // GFX9: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04] @@ -494,86 +494,86 @@ v_mad_u16 v5, v1, v2, -4.0 v_mad_u16 v5, v1, v2, v3 clamp // GFX9: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x04,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU v_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x04,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_mad_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x04,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_mad_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x04,0xd2,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: instruction not supported on this GPU v_interp_p2_f16 v5, v2, attr0.x, v3 // GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04] -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU v_interp_p2_f16 v5, -v2, attr0.x, v3 // GFX9: v_interp_p2_f16 v5, -v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x44] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU v_interp_p2_f16 v5, v2, attr0.x, |v3| // GFX9: v_interp_p2_f16 v5, v2, attr0.x, |v3| ; encoding: [0x05,0x04,0x77,0xd2,0x00,0x04,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU v_interp_p2_f16 v5, v2, attr0.w, v3 // GFX9: v_interp_p2_f16 v5, v2, attr0.w, v3 ; encoding: [0x05,0x00,0x77,0xd2,0xc0,0x04,0x0e,0x04] -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU v_interp_p2_f16 v5, v2, attr0.x, v3 high // GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 high ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x05,0x0e,0x04] -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU v_interp_p2_f16 v5, v2, attr0.x, v3 clamp // GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x77,0xd2,0x00,0x04,0x0e,0x04] -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU v_interp_p2_legacy_f16 v5, v2, attr31.x, v3 // GFX9: v_interp_p2_legacy_f16 v5, v2, attr31.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x1f,0x04,0x0e,0x04] -// NOGCN: error: invalid operand for instruction +// NOGCN: error: instruction not supported on this GPU v_interp_p2_legacy_f16 v5, -v2, attr0.x, v3 // GFX9: v_interp_p2_legacy_f16 v5, -v2, attr0.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x00,0x04,0x0e,0x44] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_interp_p2_legacy_f16 v5, v2, attr0.x, |v3| // GFX9: v_interp_p2_legacy_f16 v5, v2, attr0.x, |v3| ; encoding: [0x05,0x04,0x76,0xd2,0x00,0x04,0x0e,0x04] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_interp_p2_legacy_f16 v5, v2, attr0.w, v3 // GFX9: v_interp_p2_legacy_f16 v5, v2, attr0.w, v3 ; encoding: [0x05,0x00,0x76,0xd2,0xc0,0x04,0x0e,0x04] -// NOGCN: error: invalid operand for instruction +// NOGCN: error: instruction not supported on this GPU v_interp_p2_legacy_f16 v5, v2, attr0.x, v3 high // GFX9: v_interp_p2_legacy_f16 v5, v2, attr0.x, v3 high ; encoding: [0x05,0x00,0x76,0xd2,0x00,0x05,0x0e,0x04] -// NOGCN: error: invalid operand for instruction +// NOGCN: error: instruction not supported on this GPU v_interp_p2_legacy_f16 v5, v2, attr0.x, v3 clamp // GFX9: v_interp_p2_legacy_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04] -// NOGCN: error: invalid operand for instruction +// NOGCN: error: instruction not supported on this GPU v_cvt_norm_i16_f16_e64 v5, -v1 // GFX9: v_cvt_norm_i16_f16_e64 v5, -v1 ; encoding: [0x05,0x00,0x8d,0xd1,0x01,0x01,0x00,0x20] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_cvt_norm_i16_f16_e64 v5, |v1| // GFX9: v_cvt_norm_i16_f16_e64 v5, |v1| ; encoding: [0x05,0x01,0x8d,0xd1,0x01,0x01,0x00,0x00] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_cvt_norm_u16_f16_e64 v5, -v1 // GFX9: v_cvt_norm_u16_f16_e64 v5, -v1 ; encoding: [0x05,0x00,0x8e,0xd1,0x01,0x01,0x00,0x20] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_cvt_norm_u16_f16_e64 v5, |v1| // GFX9: v_cvt_norm_u16_f16_e64 v5, |v1| ; encoding: [0x05,0x01,0x8e,0xd1,0x01,0x01,0x00,0x00] -// NOGCN: error: not a valid operand. +// NOGCN: error: instruction not supported on this GPU v_sat_pk_u8_i16_e64 v5, -1 // GFX9: v_sat_pk_u8_i16_e64 v5, -1 ; encoding: [0x05,0x00,0x8f,0xd1,0xc1,0x00,0x00,0x00] @@ -584,53 +584,54 @@ v_sat_pk_u8_i16_e64 v5, v255 // NOGCN: error: instruction not supported on this GPU v_screen_partition_4se_b32_e64 v5, v1 -// GXF9: [0x05,0x00,0x77,0xd1,0x01,0x01,0x00,0x00] // NOGCN: error: instruction not supported on this GPU // GFX9: v_screen_partition_4se_b32_e64 v5, v1 ; encoding: [0x05,0x00,0x77,0xd1,0x01,0x01,0x00,0x00] v_screen_partition_4se_b32_e64 v5, -1 -// GXF9: [0x05,0x00,0x77,0xd1,0xc1,0x00,0x00,0x00] // NOGCN: error: instruction not supported on this GPU // GFX9: v_screen_partition_4se_b32_e64 v5, -1 ; encoding: [0x05,0x00,0x77,0xd1,0xc1,0x00,0x00,0x00] v_add_u32 v84, v13, s31 clamp // GFX9: v_add_u32_e64 v84, v13, s31 clamp ; encoding: [0x54,0x80,0x34,0xd1,0x0d,0x3f,0x00,0x00] -// NOGCN: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: invalid operand for instruction v_sub_u32 v84, v13, s31 clamp // GFX9: v_sub_u32_e64 v84, v13, s31 clamp ; encoding: [0x54,0x80,0x35,0xd1,0x0d,0x3f,0x00,0x00] -// NOGCN: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: invalid operand for instruction v_subrev_u32 v84, v13, s31 clamp // GFX9: v_subrev_u32_e64 v84, v13, s31 clamp ; encoding: [0x54,0x80,0x36,0xd1,0x0d,0x3f,0x00,0x00] -// NOGCN: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: invalid operand for instruction v_addc_co_u32 v84, s[4:5], v13, v31, vcc clamp // GFX9: v_addc_co_u32_e64 v84, s[4:5], v13, v31, vcc clamp ; encoding: [0x54,0x84,0x1c,0xd1,0x0d,0x3f,0xaa,0x01] -// NOGCN: error: invalid operand for instruction +// NOGCN: error: instruction not supported on this GPU v_subb_co_u32 v84, s[2:3], v13, v31, vcc clamp // GFX9: v_subb_co_u32_e64 v84, s[2:3], v13, v31, vcc clamp ; encoding: [0x54,0x82,0x1d,0xd1,0x0d,0x3f,0xaa,0x01] -// NOGCN: error: invalid operand for instruction +// NOGCN: error: instruction not supported on this GPU v_subbrev_co_u32 v84, vcc, v13, v31, s[6:7] clamp // GFX9: v_subbrev_co_u32_e64 v84, vcc, v13, v31, s[6:7] clamp ; encoding: [0x54,0xea,0x1e,0xd1,0x0d,0x3f,0x1a,0x00] -// NOGCN: error: invalid operand for instruction +// NOGCN: error: instruction not supported on this GPU v_add_co_u32 v84, s[4:5], v13, v31 clamp // GFX9: v_add_co_u32_e64 v84, s[4:5], v13, v31 clamp ; encoding: [0x54,0x84,0x19,0xd1,0x0d,0x3f,0x02,0x00] // NOSICI: error: integer clamping is not supported on this GPU -// NOVI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU v_sub_co_u32 v84, s[2:3], v13, v31 clamp // GFX9: v_sub_co_u32_e64 v84, s[2:3], v13, v31 clamp ; encoding: [0x54,0x82,0x1a,0xd1,0x0d,0x3f,0x02,0x00] // NOSICI: error: integer clamping is not supported on this GPU -// NOVI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU v_subrev_co_u32 v84, vcc, v13, v31 clamp // GFX9: v_subrev_co_u32_e64 v84, vcc, v13, v31 clamp ; encoding: [0x54,0xea,0x1b,0xd1,0x0d,0x3f,0x02,0x00] // NOSICI: error: integer clamping is not supported on this GPU -// NOVI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU v_addc_co_u32 v84, vcc, v13, v31, vcc // GFX9: v_addc_co_u32_e32 v84, vcc, v13, v31, vcc ; encoding: [0x0d,0x3f,0xa8,0x38] @@ -662,7 +663,8 @@ v_add_i32 v1, v2, v3 v_add_i32 v1, v2, v3 clamp // GFX9: v_add_i32 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x9c,0xd2,0x02,0x07,0x02,0x00] -// NOGCN: error: invalid operand for instruction +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU v_sub_i32 v1, v2, v3 // GFX9: v_sub_i32 v1, v2, v3 ; encoding: [0x01,0x00,0x9d,0xd2,0x02,0x07,0x02,0x00] @@ -670,7 +672,8 @@ v_sub_i32 v1, v2, v3 v_sub_i32 v1, v2, v3 clamp // GFX9: v_sub_i32 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x9d,0xd2,0x02,0x07,0x02,0x00] -// NOGCN: error: invalid operand for instruction +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU //===----------------------------------------------------------------------===// // Validate register size checks (bug 37943) @@ -720,35 +723,43 @@ v_add_f32 v0, v0, s[0:1] // NOGFX9: error: invalid operand for instruction v_add_f32 v0, v0, v[0:1] -// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: invalid operand for instruction v_add_f16 v0, s[0:1], v0 -// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: invalid operand for instruction v_add_f16 v0, v[0:1], v0 -// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: invalid operand for instruction v_add_f16 v0, v0, s[0:1] -// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: invalid operand for instruction v_add_f16 v0, v0, v[0:1] -// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: invalid operand for instruction v_add_u16 v0, s[0:1], v0 -// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: invalid operand for instruction v_add_u16 v0, v[0:1], v0 -// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: invalid operand for instruction v_add_u16 v0, v0, s[0:1] -// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: invalid operand for instruction v_add_u16 v0, v0, v[0:1] diff --git a/llvm/test/MC/AMDGPU/vop3.s b/llvm/test/MC/AMDGPU/vop3.s index 2c083e7024e3c..580b928397f6e 100644 --- a/llvm/test/MC/AMDGPU/vop3.s +++ b/llvm/test/MC/AMDGPU/vop3.s @@ -289,28 +289,28 @@ v_mac_f32_e64 v0, -v1, |v2| v_mac_f16_e64 v0, 0.5, flat_scratch_lo // VI: v_mac_f16_e64 v0, 0.5, flat_scratch_lo ; encoding: [0x00,0x00,0x23,0xd1,0xf0,0xcc,0x00,0x00] // NOCI: error: instruction not supported on this GPU -// NOSI: error: register not available on this GPU +// NOSI: error: instruction not supported on this GPU v_mac_f16_e64 v0, -4.0, flat_scratch_lo // VI: v_mac_f16_e64 v0, -4.0, flat_scratch_lo ; encoding: [0x00,0x00,0x23,0xd1,0xf7,0xcc,0x00,0x00] // NOCI: error: instruction not supported on this GPU -// NOSI: error: register not available on this GPU +// NOSI: error: instruction not supported on this GPU v_mac_f16_e64 v0, flat_scratch_lo, -4.0 // VI: v_mac_f16_e64 v0, flat_scratch_lo, -4.0 ; encoding: [0x00,0x00,0x23,0xd1,0x66,0xee,0x01,0x00] // NOCI: error: instruction not supported on this GPU -// NOSI: error: register not available on this GPU +// NOSI: error: instruction not supported on this GPU v_add_u32 v84, vcc, v13, s31 clamp -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_add_u32_e64 v84, vcc, v13, s31 clamp ; encoding: [0x54,0xea,0x19,0xd1,0x0d,0x3f,0x00,0x00] v_sub_u32 v84, s[2:3], v13, s31 clamp -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_sub_u32_e64 v84, s[2:3], v13, s31 clamp ; encoding: [0x54,0x82,0x1a,0xd1,0x0d,0x3f,0x00,0x00] v_subrev_u32 v84, vcc, v13, s31 clamp -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_subrev_u32_e64 v84, vcc, v13, s31 clamp ; encoding: [0x54,0xea,0x1b,0xd1,0x0d,0x3f,0x00,0x00] v_addc_u32 v84, s[4:5], v13, v31, vcc clamp @@ -504,15 +504,15 @@ v_fma_f16 v5, v1, v2, 0.5 v_fma_f16 v5, -v1, -v2, -v3 // VI: v_fma_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0xe4] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU v_fma_f16 v5, |v1|, |v2|, |v3| // VI: v_fma_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xee,0xd1,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU v_fma_f16 v5, v1, v2, v3 clamp // VI: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04] -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16_e64 v5, v1, v2, v3 // VI: v_div_fixup_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x04] @@ -536,15 +536,15 @@ v_div_fixup_f16 v5, v1, v2, -4.0 v_div_fixup_f16 v5, -v1, v2, v3 // VI: v_div_fixup_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x24] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, |v2|, v3 // VI: v_div_fixup_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xef,0xd1,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, v2, v3 clamp // VI: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04] -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU v_mad_f16_e64 v5, v1, v2, v3 // VI: v_mad_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x04] @@ -564,15 +564,15 @@ v_mad_f16 v5, v1, v2, 0.5 v_mad_f16 v5, v1, -v2, v3 // VI: v_mad_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x44] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, |v3| // VI: v_mad_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0xea,0xd1,0x01,0x05,0x0e,0x04] -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 clamp // VI: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04] -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU v_mad_i16_e64 v5, -1, v2, v3 // VI: v_mad_i16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0xc1,0x04,0x0e,0x04] @@ -637,19 +637,19 @@ v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp // VI: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp ; encoding: [0x05,0x80,0xe5,0xd1,0x01,0x05,0x0e,0x04] // NOCI: error: integer clamping is not supported on this GPU -// NOSI: error: invalid operand for instruction +// NOSI: error: instruction not supported on this GPU v_mqsad_u32_u8 v[252:255], v[1:2], v2, v[3:6] clamp // VI: v_mqsad_u32_u8 v[252:255], v[1:2], v2, v[3:6] clamp ; encoding: [0xfc,0x80,0xe7,0xd1,0x01,0x05,0x0e,0x04] // NOCI: error: integer clamping is not supported on this GPU -// NOSI: error: invalid operand for instruction +// NOSI: error: instruction not supported on this GPU v_mad_u16 v5, v1, v2, v3 clamp -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0x0e,0x04] v_mad_i16 v5, v1, v2, v3 clamp -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0x0e,0x04] // @@ -657,23 +657,23 @@ v_mad_i16 v5, v1, v2, v3 clamp // v_interp_mov_f32_e64 v5, p10, attr0.x -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_mov_f32_e64 v5, p10, attr0.x ; encoding: [0x05,0x00,0x72,0xd2,0x00,0x00,0x00,0x00] v_interp_mov_f32_e64 v5, p10, attr32.x -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_mov_f32_e64 v5, p10, attr32.x ; encoding: [0x05,0x00,0x72,0xd2,0x20,0x00,0x00,0x00] v_interp_mov_f32_e64 v5, p20, attr0.x -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_mov_f32_e64 v5, p20, attr0.x ; encoding: [0x05,0x00,0x72,0xd2,0x00,0x02,0x00,0x00] v_interp_mov_f32_e64 v5, p10, attr0.w -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_mov_f32_e64 v5, p10, attr0.w ; encoding: [0x05,0x00,0x72,0xd2,0xc0,0x00,0x00,0x00] v_interp_mov_f32_e64 v5, p10, attr0.x clamp -// NOSICI: error: invalid operand for instruction +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_mov_f32_e64 v5, p10, attr0.x clamp ; encoding: [0x05,0x80,0x72,0xd2,0x00,0x00,0x00,0x00] v_interp_mov_f32 v5, p10, attr0.x clamp @@ -681,15 +681,15 @@ v_interp_mov_f32 v5, p10, attr0.x clamp // VI: v_interp_mov_f32_e64 v5, p10, attr0.x clamp ; encoding: [0x05,0x80,0x72,0xd2,0x00,0x00,0x00,0x00] v_interp_mov_f32_e64 v5, p10, attr0.x mul:2 -// NOSICI: error: not a valid operand +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_mov_f32_e64 v5, p10, attr0.x mul:2 ; encoding: [0x05,0x00,0x72,0xd2,0x00,0x00,0x00,0x08] v_interp_mov_f32_e64 v5, p10, attr0.x mul:4 -// NOSICI: error: not a valid operand +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_mov_f32_e64 v5, p10, attr0.x mul:4 ; encoding: [0x05,0x00,0x72,0xd2,0x00,0x00,0x00,0x10] v_interp_mov_f32_e64 v5, p10, attr0.x div:2 -// NOSICI: error: not a valid operand +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_mov_f32_e64 v5, p10, attr0.x div:2 ; encoding: [0x05,0x00,0x72,0xd2,0x00,0x00,0x00,0x18] v_interp_mov_f32 v5, p10, attr0.x div:2 @@ -698,23 +698,23 @@ v_interp_mov_f32 v5, p10, attr0.x div:2 v_interp_p1_f32_e64 v5, v2, attr0.x -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_p1_f32_e64 v5, v2, attr0.x ; encoding: [0x05,0x00,0x70,0xd2,0x00,0x04,0x02,0x00] v_interp_p1_f32_e64 v5, v2, attr0.y -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_p1_f32_e64 v5, v2, attr0.y ; encoding: [0x05,0x00,0x70,0xd2,0x40,0x04,0x02,0x00] v_interp_p1_f32_e64 v5, -v2, attr0.x -// NOSICI: error: not a valid operand +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_p1_f32_e64 v5, -v2, attr0.x ; encoding: [0x05,0x00,0x70,0xd2,0x00,0x04,0x02,0x40] v_interp_p1_f32_e64 v5, |v2|, attr0.x -// NOSICI: error: not a valid operand +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_p1_f32_e64 v5, |v2|, attr0.x ; encoding: [0x05,0x02,0x70,0xd2,0x00,0x04,0x02,0x00] v_interp_p1_f32_e64 v5, v2, attr0.x clamp -// NOSICI: error: invalid operand for instruction +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_p1_f32_e64 v5, v2, attr0.x clamp ; encoding: [0x05,0x80,0x70,0xd2,0x00,0x04,0x02,0x00] v_interp_p1_f32 v5, v2, attr0.x clamp @@ -722,137 +722,137 @@ v_interp_p1_f32 v5, v2, attr0.x clamp // VI: v_interp_p1_f32_e64 v5, v2, attr0.x clamp ; encoding: [0x05,0x80,0x70,0xd2,0x00,0x04,0x02,0x00] v_interp_p1_f32_e64 v5, v2, attr0.x mul:2 -// NOSICI: error: not a valid operand +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_p1_f32_e64 v5, v2, attr0.x mul:2 ; encoding: [0x05,0x00,0x70,0xd2,0x00,0x04,0x02,0x08] v_interp_p2_f32_e64 v255, v2, attr0.x -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_p2_f32_e64 v255, v2, attr0.x ; encoding: [0xff,0x00,0x71,0xd2,0x00,0x04,0x02,0x00] v_interp_p2_f32_e64 v5, v2, attr31.x -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_p2_f32_e64 v5, v2, attr31.x ; encoding: [0x05,0x00,0x71,0xd2,0x1f,0x04,0x02,0x00] v_interp_p2_f32_e64 v5, -v2, attr0.x -// NOSICI: error: not a valid operand +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_p2_f32_e64 v5, -v2, attr0.x ; encoding: [0x05,0x00,0x71,0xd2,0x00,0x04,0x02,0x40] v_interp_p2_f32_e64 v5, |v2|, attr0.x -// NOSICI: error: not a valid operand +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_p2_f32_e64 v5, |v2|, attr0.x ; encoding: [0x05,0x02,0x71,0xd2,0x00,0x04,0x02,0x00] v_interp_p2_f32_e64 v5, v2, attr0.x clamp -// NOSICI: error: invalid operand for instruction +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_p2_f32_e64 v5, v2, attr0.x clamp ; encoding: [0x05,0x80,0x71,0xd2,0x00,0x04,0x02,0x00] v_interp_p2_f32_e64 v5, v2, attr0.x div:2 -// NOSICI: error: not a valid operand +// NOSICI: error: e64 variant of this instruction is not supported // VI: v_interp_p2_f32_e64 v5, v2, attr0.x div:2 ; encoding: [0x05,0x00,0x71,0xd2,0x00,0x04,0x02,0x18] v_interp_p1ll_f16 v5, v2, attr31.x -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1ll_f16 v5, v2, attr31.x ; encoding: [0x05,0x00,0x74,0xd2,0x1f,0x04,0x02,0x00] v_interp_p1ll_f16 v5, v2, attr0.w -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1ll_f16 v5, v2, attr0.w ; encoding: [0x05,0x00,0x74,0xd2,0xc0,0x04,0x02,0x00] v_interp_p1ll_f16 v5, -v2, attr0.x -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1ll_f16 v5, -v2, attr0.x ; encoding: [0x05,0x00,0x74,0xd2,0x00,0x04,0x02,0x40] v_interp_p1ll_f16 v5, |v2|, attr0.x -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1ll_f16 v5, |v2|, attr0.x ; encoding: [0x05,0x02,0x74,0xd2,0x00,0x04,0x02,0x00] v_interp_p1ll_f16 v5, v2, attr0.x high -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1ll_f16 v5, v2, attr0.x high ; encoding: [0x05,0x00,0x74,0xd2,0x00,0x05,0x02,0x00] v_interp_p1ll_f16 v5, v2, attr0.x clamp -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1ll_f16 v5, v2, attr0.x clamp ; encoding: [0x05,0x80,0x74,0xd2,0x00,0x04,0x02,0x00] v_interp_p1ll_f16 v5, v2, attr0.x mul:4 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1ll_f16 v5, v2, attr0.x mul:4 ; encoding: [0x05,0x00,0x74,0xd2,0x00,0x04,0x02,0x10] v_interp_p1lv_f16 v5, v2, attr1.x, v3 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1lv_f16 v5, v2, attr1.x, v3 ; encoding: [0x05,0x00,0x75,0xd2,0x01,0x04,0x0e,0x04] v_interp_p1lv_f16 v5, v2, attr0.z, v3 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1lv_f16 v5, v2, attr0.z, v3 ; encoding: [0x05,0x00,0x75,0xd2,0x80,0x04,0x0e,0x04] v_interp_p1lv_f16 v5, -v2, attr0.x, v3 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1lv_f16 v5, -v2, attr0.x, v3 ; encoding: [0x05,0x00,0x75,0xd2,0x00,0x04,0x0e,0x44] v_interp_p1lv_f16 v5, v2, attr0.x, -v3 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1lv_f16 v5, v2, attr0.x, -v3 ; encoding: [0x05,0x00,0x75,0xd2,0x00,0x04,0x0e,0x84] v_interp_p1lv_f16 v5, |v2|, attr0.x, v3 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1lv_f16 v5, |v2|, attr0.x, v3 ; encoding: [0x05,0x02,0x75,0xd2,0x00,0x04,0x0e,0x04] v_interp_p1lv_f16 v5, v2, attr0.x, |v3| -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1lv_f16 v5, v2, attr0.x, |v3| ; encoding: [0x05,0x04,0x75,0xd2,0x00,0x04,0x0e,0x04] v_interp_p1lv_f16 v5, v2, attr0.x, v3 high -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1lv_f16 v5, v2, attr0.x, v3 high ; encoding: [0x05,0x00,0x75,0xd2,0x00,0x05,0x0e,0x04] v_interp_p1lv_f16 v5, v2, attr0.x, v3 clamp -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1lv_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x75,0xd2,0x00,0x04,0x0e,0x04] v_interp_p1lv_f16 v5, v2, attr0.x, v3 mul:2 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1lv_f16 v5, v2, attr0.x, v3 mul:2 ; encoding: [0x05,0x00,0x75,0xd2,0x00,0x04,0x0e,0x0c] v_interp_p1lv_f16 v5, v2, attr0.x, v3 div:2 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p1lv_f16 v5, v2, attr0.x, v3 div:2 ; encoding: [0x05,0x00,0x75,0xd2,0x00,0x04,0x0e,0x1c] v_interp_p2_f16 v5, v2, attr1.x, v3 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p2_f16 v5, v2, attr1.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x01,0x04,0x0e,0x04] v_interp_p2_f16 v5, v2, attr32.x, v3 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p2_f16 v5, v2, attr32.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x20,0x04,0x0e,0x04] v_interp_p2_f16 v5, v2, attr0.w, v3 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p2_f16 v5, v2, attr0.w, v3 ; encoding: [0x05,0x00,0x76,0xd2,0xc0,0x04,0x0e,0x04] v_interp_p2_f16 v5, -v2, attr0.x, v3 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p2_f16 v5, -v2, attr0.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x00,0x04,0x0e,0x44] v_interp_p2_f16 v5, v2, attr0.x, -v3 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p2_f16 v5, v2, attr0.x, -v3 ; encoding: [0x05,0x00,0x76,0xd2,0x00,0x04,0x0e,0x84] v_interp_p2_f16 v5, |v2|, attr0.x, v3 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p2_f16 v5, |v2|, attr0.x, v3 ; encoding: [0x05,0x02,0x76,0xd2,0x00,0x04,0x0e,0x04] v_interp_p2_f16 v5, v2, attr0.x, |v3| -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p2_f16 v5, v2, attr0.x, |v3| ; encoding: [0x05,0x04,0x76,0xd2,0x00,0x04,0x0e,0x04] v_interp_p2_f16 v5, v2, attr0.x, v3 high -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p2_f16 v5, v2, attr0.x, v3 high ; encoding: [0x05,0x00,0x76,0xd2,0x00,0x05,0x0e,0x04] v_interp_p2_f16 v5, v2, attr0.x, v3 clamp -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // VI: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04] diff --git a/llvm/test/MC/AMDGPU/vop_dpp.s b/llvm/test/MC/AMDGPU/vop_dpp.s index e0dfc255a89a1..890b7e7a8f480 100644 --- a/llvm/test/MC/AMDGPU/vop_dpp.s +++ b/llvm/test/MC/AMDGPU/vop_dpp.s @@ -3,7 +3,7 @@ // RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --check-prefix=NOCI --implicit-check-not=error: // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --implicit-check-not=error: @@ -256,103 +256,105 @@ v_frexp_exp_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // VI9: v_frexp_mant_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x68,0x02,0x7e,0x00,0x01,0x09,0xa1] v_frexp_mant_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. // VI9: v_log_legacy_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x98,0x02,0x7e,0x00,0x01,0x09,0xa1] +// NOSI: error: instruction not supported on this GPU +// NOCI: error: not a valid operand. v_log_legacy_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. // VI9: v_exp_legacy_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x96,0x02,0x7e,0x00,0x01,0x09,0xa1] +// NOSI: error: instruction not supported on this GPU +// NOCI: error: not a valid operand. v_exp_legacy_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_cvt_f16_u16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x72,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f16_u16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_cvt_f16_i16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x74,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f16_i16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_cvt_u16_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x76,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_u16_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_cvt_i16_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x78,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_i16_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_rcp_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x7a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rcp_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_sqrt_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x7c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_sqrt_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_rsq_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x7e,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rsq_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_log_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x80,0x02,0x7e,0x00,0x01,0x09,0xa1] v_log_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_exp_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x82,0x02,0x7e,0x00,0x01,0x09,0xa1] v_exp_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_frexp_mant_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x84,0x02,0x7e,0x00,0x01,0x09,0xa1] v_frexp_mant_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_frexp_exp_i16_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x86,0x02,0x7e,0x00,0x01,0x09,0xa1] v_frexp_exp_i16_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_floor_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x88,0x02,0x7e,0x00,0x01,0x09,0xa1] v_floor_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_ceil_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x8a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_ceil_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_trunc_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x8c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_trunc_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_rndne_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x8e,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rndne_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_fract_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x90,0x02,0x7e,0x00,0x01,0x09,0xa1] v_fract_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_sin_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x92,0x02,0x7e,0x00,0x01,0x09,0xa1] v_sin_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_cos_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x94,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cos_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // GFX9: v_cvt_norm_i16_f16_dpp v5, |v1| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x9a,0x0a,0x7e,0x01,0xe4,0x20,0x00] -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU v_cvt_norm_i16_f16_dpp v5, |v1| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX9: v_cvt_norm_u16_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x9c,0x0a,0x7e,0x01,0x1b,0x00,0x00] -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU v_cvt_norm_u16_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 // GFX9: v_sat_pk_u8_i16_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x9e,0x0a,0x7e,0x01,0x2f,0x01,0x00] -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU v_sat_pk_u8_i16_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0 -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU // GFX9: v_screen_partition_4se_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 ; encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0xe4,0x08,0x00] v_screen_partition_4se_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 @@ -453,139 +455,139 @@ v_or_b32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // VI9: v_xor_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x2a,0x02,0x01,0x09,0xa1] v_xor_b32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_add_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3e,0x02,0x01,0x09,0xa1] v_add_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_sub_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x40,0x02,0x01,0x09,0xa1] v_sub_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_subrev_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x42,0x02,0x01,0x09,0xa1] v_subrev_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_mul_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x44,0x02,0x01,0x09,0xa1] v_mul_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_mac_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x46,0x02,0x01,0x09,0xa1] v_mac_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_add_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x4c,0x02,0x01,0x09,0xa1] v_add_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_sub_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x4e,0x02,0x01,0x09,0xa1] v_sub_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_subrev_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x50,0x02,0x01,0x09,0xa1] v_subrev_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_mul_lo_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x52,0x02,0x01,0x09,0xa1] v_mul_lo_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_lshlrev_b16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x54,0x02,0x01,0x09,0xa1] v_lshlrev_b16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_lshrrev_b16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x56,0x02,0x01,0x09,0xa1] v_lshrrev_b16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_ashrrev_i16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x58,0x02,0x01,0x09,0xa1] v_ashrrev_i16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_max_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x5a,0x02,0x01,0x09,0xa1] v_max_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_min_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x5c,0x02,0x01,0x09,0xa1] v_min_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_max_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x5e,0x02,0x01,0x09,0xa1] v_max_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_max_i16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x60,0x02,0x01,0x09,0xa1] v_max_i16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_min_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x62,0x02,0x01,0x09,0xa1] v_min_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_min_i16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x64,0x02,0x01,0x09,0xa1] v_min_i16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI9: v_ldexp_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x66,0x02,0x01,0x09,0xa1] v_ldexp_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOGFX9: error: not a valid operand. // VI: v_add_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1] v_add_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOGFX9: error: not a valid operand. // VI: v_sub_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1] v_sub_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOGFX9: error: not a valid operand. // VI: v_subrev_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1] v_subrev_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: not a valid operand. -// NOGFX9: error: not a valid operand. +// NOGFX9: error: instruction not supported on this GPU // VI: v_addc_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1] v_addc_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: not a valid operand. -// NOGFX9: error: not a valid operand. +// NOGFX9: error: instruction not supported on this GPU // VI: v_subb_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1] v_subb_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: not a valid operand. -// NOGFX9: error: not a valid operand. +// NOGFX9: error: instruction not supported on this GPU // VI: v_subbrev_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1] v_subbrev_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU // GFX9: v_add_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1] v_add_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU // GFX9: v_sub_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1] v_sub_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU // GFX9: v_subrev_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1] v_subrev_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU // GFX9: v_addc_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1] v_addc_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU // GFX9: v_subb_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1] v_subb_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU // GFX9: v_subbrev_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1] v_subbrev_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 @@ -635,32 +637,32 @@ v_add_f32 v0, v1, s45 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // Validate register size checks (bug 37943) //===----------------------------------------------------------------------===// -// NOSICI: error: not a valid operand +// NOSICI: error: dpp variant of this instruction is not supported // NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f32_dpp v5, v[1:2], v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// NOSICI: error: not a valid operand +// NOSICI: error: dpp variant of this instruction is not supported // NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f32_dpp v5, v[1:3], v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// NOSICI: error: not a valid operand +// NOSICI: error: dpp variant of this instruction is not supported // NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f32_dpp v5, v1, v[1:2] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// NOSICI: error: not a valid operand +// NOSICI: error: dpp variant of this instruction is not supported // NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f32_dpp v5, v1, v[1:4] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f16 v1, v[2:3], v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f16 v1, v3, v[2:3] row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 diff --git a/llvm/test/MC/AMDGPU/vop_sdwa.s b/llvm/test/MC/AMDGPU/vop_sdwa.s index 9a4283e73e384..222e5dd4644b7 100644 --- a/llvm/test/MC/AMDGPU/vop_sdwa.s +++ b/llvm/test/MC/AMDGPU/vop_sdwa.s @@ -63,11 +63,11 @@ v_mov_b32 v1, v0 clamp src0_sel:WORD_1 // GFX89: v_trunc_f32_sdwa v1, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 ; encoding: [0xf9,0x38,0x02,0x7e,0x00,0x36,0x05,0x00] v_trunc_f32 v1, v0 clamp dst_sel:DWORD src0_sel:WORD_1 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // GFX89: v_mov_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x00,0x16,0x06,0x00] v_mov_b32_sdwa v1, v0 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // GFX89: v_add_f32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x06,0x05,0x06] v_add_f32_sdwa v0, v0, v0 dst_unused:UNUSED_PAD src0_sel:WORD_1 @@ -79,7 +79,7 @@ v_min_f32 v0, v0, v0 clamp dst_sel:DWORD src1_sel:BYTE_2 // GFX89: v_and_b32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x26,0x00,0x06,0x06,0x02] v_and_b32 v0, v0, v0 dst_unused:UNUSED_PAD src1_sel:BYTE_2 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // GFX89: v_mul_i32_i24_sdwa v1, v2, v3 clamp dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x06,0x02,0x0c,0x02,0x36,0x06,0x06] v_mul_i32_i24_sdwa v1, v2, v3 clamp @@ -103,7 +103,7 @@ v_add_f32 v0, -|v0|, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src // GFX89: v_min_f32_sdwa v0, |v0|, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x14,0x00,0x06,0x25,0x12] v_min_f32 v0, abs(v0), -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // GFX89: v_mov_b32_sdwa v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x00,0x16,0x0e,0x00] v_mov_b32_sdwa v1, sext(v0) @@ -111,7 +111,7 @@ v_mov_b32_sdwa v1, sext(v0) // GFX89: v_and_b32_sdwa v0, sext(v0), sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x26,0x00,0x06,0x0e,0x0a] v_and_b32 v0, sext(v0), sext(v0) dst_unused:UNUSED_PAD src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // VI: v_cmp_class_f32 vcc, -v1, sext(v2) src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x20,0x7c,0x01,0x00,0x12,0x0c] // GFX9: v_cmp_class_f32_sdwa vcc, -v1, sext(v2) src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x20,0x7c,0x01,0x00,0x12,0x0c] v_cmp_class_f32_sdwa vcc, -v1, sext(v2) src0_sel:BYTE_2 src1_sel:WORD_0 @@ -120,7 +120,7 @@ v_cmp_class_f32_sdwa vcc, -v1, sext(v2) src0_sel:BYTE_2 src1_sel:WORD_0 // Check VOP1 opcodes //===----------------------------------------------------------------------===// -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: sdwa variant of this instruction is not supported // GFX89: v_nop ; encoding: [0xf9,0x00,0x00,0x7e,0x00,0x00,0x00,0x00] v_nop_sdwa @@ -261,110 +261,110 @@ v_frexp_exp_i32_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 v_frexp_mant_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 // GFX89: v_log_legacy_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x98,0x02,0x7e,0x00,0x06,0x05,0x00] -// NOSI: error: not a valid operand. +// NOSI: error: instruction not supported on this GPU // NOCI: error: invalid operand for instruction v_log_legacy_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 // GFX89: v_exp_legacy_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x96,0x02,0x7e,0x00,0x06,0x05,0x00] -// NOSI: error: not a valid operand. +// NOSI: error: instruction not supported on this GPU // NOCI: error: invalid operand for instruction v_exp_legacy_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_cvt_f16_u16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x72,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f16_u16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_cvt_f16_i16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x74,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f16_i16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_cvt_u16_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x76,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_u16_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_cvt_i16_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x78,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_i16_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_rcp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x7a,0x02,0x7e,0x00,0x06,0x05,0x00] v_rcp_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_sqrt_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x7c,0x02,0x7e,0x00,0x06,0x05,0x00] v_sqrt_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_rsq_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x7e,0x02,0x7e,0x00,0x06,0x05,0x00] v_rsq_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_log_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x80,0x02,0x7e,0x00,0x06,0x05,0x00] v_log_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_exp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x82,0x02,0x7e,0x00,0x06,0x05,0x00] v_exp_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_frexp_mant_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x84,0x02,0x7e,0x00,0x06,0x05,0x00] v_frexp_mant_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_frexp_exp_i16_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x86,0x02,0x7e,0x00,0x06,0x05,0x00] v_frexp_exp_i16_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_floor_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x88,0x02,0x7e,0x00,0x06,0x05,0x00] v_floor_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_ceil_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x8a,0x02,0x7e,0x00,0x06,0x05,0x00] v_ceil_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_trunc_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x8c,0x02,0x7e,0x00,0x06,0x05,0x00] v_trunc_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_rndne_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x8e,0x02,0x7e,0x00,0x06,0x05,0x00] v_rndne_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_fract_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x90,0x02,0x7e,0x00,0x06,0x05,0x00] v_fract_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_sin_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x92,0x02,0x7e,0x00,0x06,0x05,0x00] v_sin_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_cos_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x94,0x02,0x7e,0x00,0x06,0x05,0x00] v_cos_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 // GFX9: v_cvt_norm_i16_f16_sdwa v5, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x9a,0x0a,0x7e,0x01,0x06,0x16,0x00] -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU v_cvt_norm_i16_f16_sdwa v5, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // GFX9: v_cvt_norm_i16_f16_sdwa v5, |v1| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x9a,0x0a,0x7e,0x01,0x06,0x26,0x00] -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU v_cvt_norm_i16_f16_sdwa v5, |v1| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // GFX9: v_cvt_norm_u16_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x9c,0x0a,0x7e,0x01,0x16,0x06,0x00] -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU v_cvt_norm_u16_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD // GFX9: v_cvt_norm_u16_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x9c,0x0a,0x7e,0x01,0x06,0x05,0x00] -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU v_cvt_norm_u16_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 // GFX9: v_sat_pk_u8_i16_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x9e,0x0a,0x7e,0x01,0x06,0x0e,0x00] -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU v_sat_pk_u8_i16_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD //===----------------------------------------------------------------------===// @@ -451,135 +451,135 @@ v_or_b32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel // GFX89: v_xor_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x2a,0x02,0x06,0x05,0x02] v_xor_b32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_add_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3e,0x02,0x06,0x05,0x02] v_add_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_sub_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x40,0x02,0x06,0x05,0x02] v_sub_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_subrev_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x42,0x02,0x06,0x05,0x02] v_subrev_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_mul_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x44,0x02,0x06,0x05,0x02] v_mul_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_add_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x4c,0x02,0x06,0x05,0x02] v_add_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_sub_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x4e,0x02,0x06,0x05,0x02] v_sub_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_subrev_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x50,0x02,0x06,0x05,0x02] v_subrev_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_mul_lo_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x52,0x02,0x06,0x05,0x02] v_mul_lo_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_lshlrev_b16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x54,0x02,0x06,0x05,0x02] v_lshlrev_b16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_lshrrev_b16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x56,0x02,0x06,0x05,0x02] v_lshrrev_b16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_ashrrev_i16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x58,0x02,0x06,0x05,0x02] v_ashrrev_i16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_max_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x5a,0x02,0x06,0x05,0x02] v_max_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_min_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x5c,0x02,0x06,0x05,0x02] v_min_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_max_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x5e,0x02,0x06,0x05,0x02] v_max_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_max_i16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x60,0x02,0x06,0x05,0x02] v_max_i16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_min_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x62,0x02,0x06,0x05,0x02] v_min_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_min_i16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x64,0x02,0x06,0x05,0x02] v_min_i16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // GFX89: v_ldexp_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x66,0x02,0x06,0x05,0x02] v_ldexp_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOGFX9: error: instruction not supported on this GPU // VI: v_add_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02] v_add_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOGFX9: error: instruction not supported on this GPU // VI: v_sub_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02] v_sub_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOGFX9: error: instruction not supported on this GPU // VI: v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02] v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: invalid operand for instruction -// NOGFX9: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported +// NOGFX9: error: instruction not supported on this GPU // VI: v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x38,0x02,0x06,0x05,0x02] v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: invalid operand for instruction -// NOGFX9: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported +// NOGFX9: error: instruction not supported on this GPU // VI: v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3a,0x02,0x06,0x05,0x02] v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: invalid operand for instruction -// NOGFX9: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported +// NOGFX9: error: instruction not supported on this GPU // VI: v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02] v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: invalid operand for instruction -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU // GFX9: v_add_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02] v_add_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: invalid operand for instruction -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU // GFX9: v_sub_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02] v_sub_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: invalid operand for instruction -// NOVI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU // GFX9: v_subrev_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02] v_subrev_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU // GFX9: v_addc_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x38,0x02,0x06,0x05,0x02] v_addc_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU // GFX9: v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3a,0x02,0x06,0x05,0x02] v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU // GFX9: v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02] v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 @@ -605,72 +605,72 @@ v_cndmask_b32_sdwa v5, vcc_lo, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE // Check VOPC opcodes //===----------------------------------------------------------------------===// -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // VI: v_cmp_eq_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmp_eq_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x00,0x02,0x04] v_cmp_eq_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // VI: v_cmp_nle_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x98,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmp_nle_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x98,0x7c,0x01,0x00,0x02,0x04] v_cmp_nle_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // VI: v_cmpx_gt_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xa8,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_gt_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xa8,0x7c,0x01,0x00,0x02,0x04] v_cmpx_gt_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // VI: v_cmpx_nlt_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xbc,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_nlt_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xbc,0x7c,0x01,0x00,0x02,0x04] v_cmpx_nlt_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // VI: v_cmp_lt_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x82,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmp_lt_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x82,0x7d,0x01,0x00,0x02,0x04] v_cmp_lt_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // VI: v_cmp_t_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x8e,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmp_t_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x8e,0x7d,0x01,0x00,0x02,0x04] v_cmp_t_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // VI: v_cmpx_eq_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xa4,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_eq_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xa4,0x7d,0x01,0x00,0x02,0x04] v_cmpx_eq_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // VI: v_cmpx_ne_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xaa,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_ne_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xaa,0x7d,0x01,0x00,0x02,0x04] v_cmpx_ne_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // VI: v_cmp_f_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x90,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmp_f_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x90,0x7d,0x01,0x00,0x02,0x04] v_cmp_f_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // VI: v_cmp_gt_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x98,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmp_gt_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x98,0x7d,0x01,0x00,0x02,0x04] v_cmp_gt_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // VI: v_cmpx_le_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xb6,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_le_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xb6,0x7d,0x01,0x00,0x02,0x04] v_cmpx_le_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // VI: v_cmpx_ne_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xba,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_ne_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xba,0x7d,0x01,0x00,0x02,0x04] v_cmpx_ne_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // VI: v_cmp_class_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x20,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmp_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x20,0x7c,0x01,0x00,0x02,0x04] v_cmp_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // VI: v_cmpx_class_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x22,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x22,0x7c,0x01,0x00,0x02,0x04] v_cmpx_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 @@ -698,7 +698,7 @@ v_mac_f32 v15, v99, v194 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:WORD_0 // NOGFX9: error: instruction not supported on this GPU v_mac_f32 v194, v13, v1 dst_sel:BYTE_0 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // VI: v_mac_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x46,0x02,0x06,0x05,0x02] // NOGFX9: error: instruction not supported on this GPU v_mac_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 @@ -717,7 +717,7 @@ v_mov_b32 v1, s2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD // GFX9: v_mov_b32_sdwa v1, exec_lo dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x7e,0x10,0x86,0x00] v_mov_b32 v1, exec_lo dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: register not available on this GPU +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: register not available on this GPU // GFX9: v_mov_b32_sdwa v1, ttmp12 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x78,0x10,0x86,0x00] v_mov_b32_sdwa v1, ttmp12 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD @@ -747,272 +747,272 @@ v_add_f32 v0, v1, tba_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src // NOGFX9: error: register not available on this GPU v_add_f32 v0, v1, tma_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa vcc, s1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x00,0x85,0x02] v_cmp_eq_f32_sdwa vcc, s1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa vcc, v1, s22 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x2c,0x84,0x7c,0x01,0x00,0x05,0x82] v_cmp_eq_f32_sdwa vcc, v1, s22 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: register not available on this GPU +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: register not available on this GPU // GFX9: v_cmp_eq_f32_sdwa ttmp[12:13], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0xf8,0x05,0x02] v_cmp_eq_f32_sdwa ttmp[12:13], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: instruction not supported on this GPU // NOGFX9: error: register not available on this GPU v_cmp_eq_f32_sdwa tba, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: instruction not supported on this GPU // NOGFX9: error: register not available on this GPU v_cmp_eq_f32_sdwa tma, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: register not available on this GPU +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: register not available on this GPU // GFX9: v_cmp_eq_f32_sdwa vcc, v1, ttmp15 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0xf6,0x84,0x7c,0x01,0x00,0x05,0x82] v_cmp_eq_f32_sdwa vcc, v1, ttmp15 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32_sdwa vcc, exec_lo, vcc_lo src0_sel:WORD_1 src1_sel:BYTE_2 // NOVI: error: invalid operand for instruction // GFX9: v_ceil_f16_sdwa v5, flat_scratch_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0x66,0x06,0x86,0x00] -// NOSI: error: register not available on this GPU -// NOCI: error: not a valid operand. +// NOSI: error: instruction not supported on this GPU +// NOCI: error: instruction not supported on this GPU v_ceil_f16_sdwa v5, flat_scratch_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD //===----------------------------------------------------------------------===// // Inline constants are allowed (though semantics is not clear yet) //===----------------------------------------------------------------------===// -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v5, 0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0x80,0x06,0x86,0x00] v_mov_b32_sdwa v5, 0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v5, -1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0xc1,0x06,0x86,0x00] v_mov_b32_sdwa v5, -1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v5, 0.5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0xf0,0x06,0x86,0x00] v_mov_b32_sdwa v5, 0.5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v5, -4.0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0xf7,0x06,0x86,0x00] v_mov_b32_sdwa v5, -4.0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v5, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0xc1,0x16,0x8e,0x00] v_mov_b32_sdwa v5, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, -1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xc1,0x06,0x86,0x06] v_add_f32_sdwa v5, -1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, |-1|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xc1,0x16,0xa6,0x06] v_add_f32_sdwa v5, |-1|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, neg(-1), -|v2| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xc1,0x16,0x96,0x36] v_add_f32_sdwa v5, neg(-1), -|v2| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, -|-1|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xc1,0x16,0xb6,0x06] v_add_f32_sdwa v5, -|-1|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, 0.5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xf0,0x06,0x86,0x06] v_add_f32_sdwa v5, 0.5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, |-4.0|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xf7,0x16,0xa6,0x06] v_add_f32_sdwa v5, |-4.0|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, neg(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xf7,0x16,0x96,0x06] v_add_f32_sdwa v5, neg(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, -|-4.0|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xf7,0x16,0xb6,0x06] v_add_f32_sdwa v5, -|-4.0|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, -4.0 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xee,0x0b,0x02,0x02,0x16,0x06,0x86] v_add_f32_sdwa v5, v2, -4.0 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, |-4.0| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xee,0x0b,0x02,0x02,0x16,0x06,0xa6] v_add_f32_sdwa v5, v2, |-4.0| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, neg(-4.0) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xee,0x0b,0x02,0x02,0x16,0x06,0x96] v_add_f32_sdwa v5, v2, neg(-4.0) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, -|-4.0| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xee,0x0b,0x02,0x02,0x16,0x06,0xb6] v_add_f32_sdwa v5, v2, -|-4.0| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x02,0x02,0x16,0x06,0x86] v_add_f32_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, |-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x02,0x02,0x16,0x06,0xa6] v_add_f32_sdwa v5, v2, |-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, neg(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x02,0x02,0x16,0x06,0x96] v_add_f32_sdwa v5, v2, neg(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, -|-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x02,0x02,0x16,0x06,0xb6] v_add_f32_sdwa v5, v2, -|-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_and_b32_sdwa v5, -4.0, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x26,0xf7,0x16,0x86,0x06] v_and_b32_sdwa v5, -4.0, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_and_b32_sdwa v5, sext(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x26,0xf7,0x16,0x8e,0x06] v_and_b32_sdwa v5, sext(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_and_b32_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x26,0x02,0x16,0x06,0x86] v_and_b32_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_and_b32_sdwa v5, v2, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x26,0x02,0x16,0x06,0x8e] v_and_b32_sdwa v5, v2, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xc1,0x16,0x86,0x00] v_exp_f16_sdwa v5, -1 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, |-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xc1,0x16,0xa6,0x00] v_exp_f16_sdwa v5, |-1| -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, neg(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xc1,0x16,0x96,0x00] v_exp_f16_sdwa v5, neg(-1) -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, -|-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xc1,0x16,0xb6,0x00] v_exp_f16_sdwa v5, -|-1| -// NOSICI: error: invalid operand for instruction +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, 0.5 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xf0,0x16,0x86,0x00] v_exp_f16_sdwa v5, 0.5 -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, |0.5| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xf0,0x16,0xa6,0x00] v_exp_f16_sdwa v5, |0.5| -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, neg(0.5) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xf0,0x16,0x96,0x00] v_exp_f16_sdwa v5, neg(0.5) -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, -|0.5| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xf0,0x16,0xb6,0x00] v_exp_f16_sdwa v5, -|0.5| -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_max_i16_sdwa v5, -4.0, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_max_i16_sdwa v5, sext(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // GFX9: v_max_i16_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x60,0x02,0x16,0x06,0x86] v_max_i16_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU // NOVI: error: invalid operand for instruction // GFX9: v_max_i16_sdwa v5, v2, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x60,0x02,0x16,0x06,0x8e] v_max_i16_sdwa v5, v2, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], -4.0, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x84,0x7c,0xf7,0x86,0x86,0x06] v_cmp_eq_f32_sdwa s[6:7], -4.0, v2 src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], |-4.0|, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x84,0x7c,0xf7,0x86,0xa6,0x06] v_cmp_eq_f32_sdwa s[6:7], |-4.0|, v2 src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], neg(-4.0), v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x84,0x7c,0xf7,0x86,0x96,0x06] v_cmp_eq_f32_sdwa s[6:7], neg(-4.0), v2 src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], -|-4.0|, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x84,0x7c,0xf7,0x86,0xb6,0x06] v_cmp_eq_f32_sdwa s[6:7], -|-4.0|, v2 src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], v2, -1 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x85,0x7c,0x02,0x86,0x06,0x86] v_cmp_eq_f32_sdwa s[6:7], v2, -1 src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], v2, |-1| src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x85,0x7c,0x02,0x86,0x06,0xa6] v_cmp_eq_f32_sdwa s[6:7], v2, |-1| src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], v2, neg(-1) src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x85,0x7c,0x02,0x86,0x06,0x96] v_cmp_eq_f32_sdwa s[6:7], v2, neg(-1) src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], v2, -|-1| src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x85,0x7c,0x02,0x86,0x06,0xb6] v_cmp_eq_f32_sdwa s[6:7], v2, -|-1| src0_sel:DWORD src1_sel:DWORD @@ -1033,7 +1033,7 @@ v_cmpx_class_f32 vcc, v1, 200 src0_sel:BYTE_2 src1_sel:WORD_0 // NOGFX89: error: invalid operand for instruction v_cmpx_class_f32 vcc, 200, v1 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: not a valid operand. +// NOSICI: error: sdwa variant of this instruction is not supported // NOGFX89: error: invalid operand for instruction v_mov_b32_sdwa v5, -17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD @@ -1041,17 +1041,17 @@ v_mov_b32_sdwa v5, -17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // VOPC with arbitrary SGPR destination //===----------------------------------------------------------------------===// -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: instruction not supported on this GPU // GFX9: v_cmp_eq_f32_sdwa s[2:3], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x82,0x05,0x02] v_cmp_eq_f32_sdwa s[2:3], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: instruction not supported on this GPU // GFX9: v_cmp_eq_f32_sdwa exec, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0xfe,0x05,0x02] v_cmp_eq_f32_sdwa exec, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: sdwa variant of this instruction is not supported // NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa exec, s2, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x02,0xfe,0x85,0x02] v_cmp_eq_f32_sdwa exec, s2, v2 src0_sel:WORD_1 src1_sel:BYTE_2 @@ -1084,8 +1084,8 @@ v_add_f32 v0, v0, v0 clamp div:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WO // Check Instructions //---------------------------------------------------------------------------// -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU // GFX9: v_screen_partition_4se_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:BYTE_0 ; encoding: [0xf9,0x6e,0x0a,0x7e,0x01,0x16,0x00,0x00] v_screen_partition_4se_b32_sdwa v5, v1 src0_sel:BYTE_0 @@ -1121,38 +1121,38 @@ v_add_f32 v0, s0, v[0:1] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src // NOGFX89: error: invalid operand for instruction v_add_f32 v0, s0, v[0:3] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // NOGFX89: error: invalid operand for instruction v_add_f16 v1, v[2:3], v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // NOGFX89: error: invalid operand for instruction v_add_f16 v1, s[2:3], v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // NOGFX89: error: invalid operand for instruction v_add_f16 v1, v2, v[2:3] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // NOGFX89: error: invalid operand for instruction v_add_f16 v1, v2, s[2:3] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // NOVI: error: not a valid operand // NOGFX9: error: invalid operand for instruction v_add_u32 v1, v[2:3], v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // NOVI: error: not a valid operand // NOGFX9: error: invalid operand for instruction v_add_u32 v1, s[2:3], v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // NOVI: error: not a valid operand // NOGFX9: error: invalid operand for instruction v_add_u32 v1, v3, v[2:3] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand +// NOSICI: error: instruction not supported on this GPU // NOVI: error: not a valid operand // NOGFX9: error: invalid operand for instruction v_add_u32 v1, v3, s[2:3] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 diff --git a/llvm/test/MC/AMDGPU/wave32.s b/llvm/test/MC/AMDGPU/wave32.s index b9f6af4b28169..2044f6ec3b481 100644 --- a/llvm/test/MC/AMDGPU/wave32.s +++ b/llvm/test/MC/AMDGPU/wave32.s @@ -88,12 +88,12 @@ v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX1064: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00] v_add_co_u32_e32 v2, vcc_lo, s0, v2 -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX1064-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:1: error: e32 variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: e32 variant of this instruction is not supported v_add_co_u32_e32 v2, vcc, s0, v2 -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: e32 variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: e32 variant of this instruction is not supported v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo // GFX1032: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50] @@ -108,20 +108,20 @@ v_add_co_ci_u32_e32 v3, v3, v4 // GFX1064: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50] v_sub_co_u32_e32 v2, vcc_lo, s0, v2 -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX1064-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:1: error: e32 variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: e32 variant of this instruction is not supported v_sub_co_u32_e32 v2, vcc, s0, v2 -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: e32 variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: e32 variant of this instruction is not supported v_subrev_co_u32_e32 v2, vcc_lo, s0, v2 -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX1064-ERR: :[[@LINE-2]]:33: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:1: error: e32 variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: e32 variant of this instruction is not supported v_subrev_co_u32_e32 v2, vcc, s0, v2 -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: e32 variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: e32 variant of this instruction is not supported v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo // GFX1032: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52] @@ -148,16 +148,16 @@ v_subrev_co_ci_u32_e32 v1, 0, v1 // GFX1064: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54] v_add_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: :[[@LINE-1]]:38: error: invalid operand for instruction{{$}} -// GFX1064-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction{{$}} +// GFX1032-ERR: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported v_add_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported v_add_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: :[[@LINE-1]]:30: error: not a valid operand.{{$}} -// GFX1064-ERR: :[[@LINE-2]]:30: error: not a valid operand.{{$}} +// GFX1032-ERR: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06] @@ -172,28 +172,28 @@ v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYT // GFX1064: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06] v_sub_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: :[[@LINE-1]]:38: error: invalid operand for instruction{{$}} -// GFX1064-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction{{$}} +// GFX1032-ERR: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported v_sub_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported v_sub_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: :[[@LINE-1]]:30: error: not a valid operand.{{$}} -// GFX1064-ERR: :[[@LINE-2]]:30: error: not a valid operand.{{$}} +// GFX1032-ERR: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported v_subrev_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction{{$}} -// GFX1064-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction{{$}} +// GFX1032-ERR: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported v_subrev_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported v_subrev_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: :[[@LINE-1]]:33: error: not a valid operand.{{$}} -// GFX1064-ERR: :[[@LINE-2]]:33: error: not a valid operand.{{$}} +// GFX1032-ERR: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX1032: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06] @@ -232,16 +232,16 @@ v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:U // GFX1064: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e] v_add_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: :[[@LINE-1]]:29: error: not a valid operand.{{$}} -// GFX1064-ERR: :[[@LINE-2]]:29: error: not a valid operand.{{$}} +// GFX1032-ERR: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: dpp variant of this instruction is not supported v_add_co_u32_dpp v5, vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: :[[@LINE-1]]:37: error: not a valid operand.{{$}} -// GFX1064-ERR: :[[@LINE-2]]:37: error: not a valid operand.{{$}} +// GFX1032-ERR: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: dpp variant of this instruction is not supported v_add_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: :[[@LINE-1]]:34: error: not a valid operand.{{$}} -// GFX1064-ERR: :[[@LINE-2]]:34: error: not a valid operand.{{$}} +// GFX1032-ERR: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: dpp variant of this instruction is not supported v_add_co_ci_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX1032: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00] @@ -256,16 +256,16 @@ v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_m // GFX1064: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00] v_sub_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: :[[@LINE-1]]:29: error: not a valid operand.{{$}} -// GFX1064-ERR: :[[@LINE-2]]:29: error: not a valid operand.{{$}} +// GFX1032-ERR: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: dpp variant of this instruction is not supported v_sub_co_u32_dpp v5, vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: :[[@LINE-1]]:37: error: not a valid operand.{{$}} -// GFX1064-ERR: :[[@LINE-2]]:37: error: not a valid operand.{{$}} +// GFX1032-ERR: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: dpp variant of this instruction is not supported v_sub_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: :[[@LINE-1]]:34: error: not a valid operand.{{$}} -// GFX1064-ERR: :[[@LINE-2]]:34: error: not a valid operand.{{$}} +// GFX1032-ERR: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: dpp variant of this instruction is not supported v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX1032: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00] @@ -276,16 +276,16 @@ v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_m // GFX1064: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00] v_subrev_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: :[[@LINE-1]]:32: error: not a valid operand.{{$}} -// GFX1064-ERR: :[[@LINE-2]]:32: error: not a valid operand.{{$}} +// GFX1032-ERR: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: dpp variant of this instruction is not supported v_subrev_co_u32_dpp v5, vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: :[[@LINE-1]]:40: error: not a valid operand.{{$}} -// GFX1064-ERR: :[[@LINE-2]]:40: error: not a valid operand.{{$}} +// GFX1032-ERR: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: dpp variant of this instruction is not supported v_subrev_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: :[[@LINE-1]]:37: error: not a valid operand.{{$}} -// GFX1064-ERR: :[[@LINE-2]]:37: error: not a valid operand.{{$}} +// GFX1032-ERR: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported +// GFX1064-ERR: :[[@LINE-2]]:1: error: dpp variant of this instruction is not supported v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX1032: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/xdl-insts-err.s b/llvm/test/MC/AMDGPU/xdl-insts-err.s index d774260bf941c..7cb8c26e79073 100644 --- a/llvm/test/MC/AMDGPU/xdl-insts-err.s +++ b/llvm/test/MC/AMDGPU/xdl-insts-err.s @@ -4,37 +4,45 @@ // GFX906-ERR: error: instruction not supported on this GPU v_dot2c_f32_f16 v0, v1, v2 -// GCN-ERR: error: invalid instruction +// GFX906-ERR: error: instruction not supported on this GPU +// GFX908-ERR: error: e64 variant of this instruction is not supported v_dot2c_f32_f16_e64 v0, v1, v2 -// GCN-ERR: error: invalid instruction +// GFX906-ERR: error: instruction not supported on this GPU +// GFX908-ERR: error: sdwa variant of this instruction is not supported v_dot2c_f32_f16_sdwa v0, v1, v2 // GFX906-ERR: error: instruction not supported on this GPU v_dot2c_i32_i16 v0, v1, v2 -// GCN-ERR: error: invalid instruction +// GFX906-ERR: error: instruction not supported on this GPU +// GFX908-ERR: error: e64 variant of this instruction is not supported v_dot2c_i32_i16_e64 v0, v1, v2 -// GCN-ERR: error: invalid instruction +// GFX906-ERR: error: instruction not supported on this GPU +// GFX908-ERR: error: sdwa variant of this instruction is not supported v_dot2c_i32_i16_sdwa v0, v1, v2 // GFX906-ERR: error: instruction not supported on this GPU v_dot4c_i32_i8 v0, v1, v2 -// GCN-ERR: error: invalid instruction +// GFX906-ERR: error: instruction not supported on this GPU +// GFX908-ERR: error: e64 variant of this instruction is not supported v_dot4c_i32_i8_e64 v0, v1, v2 -// GCN-ERR: error: invalid instruction +// GFX906-ERR: error: instruction not supported on this GPU +// GFX908-ERR: error: sdwa variant of this instruction is not supported v_dot4c_i32_i8_sdwa v0, v1, v2 // GFX906-ERR: error: instruction not supported on this GPU v_dot8c_i32_i4 v0, v1, v2 -// GCN-ERR: error: invalid instruction +// GFX906-ERR: error: instruction not supported on this GPU +// GFX908-ERR: error: e64 variant of this instruction is not supported v_dot8c_i32_i4_e64 v0, v1, v2 -// GCN-ERR: error: invalid instruction +// GFX906-ERR: error: instruction not supported on this GPU +// GFX908-ERR: error: sdwa variant of this instruction is not supported v_dot8c_i32_i4_sdwa v0, v1, v2 // GFX906-ERR: error: instruction not supported on this GPU From 68e002e1819f1598fc6815226a353ad2f04cd509 Mon Sep 17 00:00:00 2001 From: Sam Tebbs Date: Wed, 23 Sep 2020 11:43:27 +0100 Subject: [PATCH 137/321] [ARM] Fold select_cc(vecreduce_[u|s][min|max], x) into VMINV or VMAXV This folds a select_cc or select(set_cc) of a max or min vector reduction with a scalar value into a VMAXV or VMINV. Differential Revision: https://reviews.llvm.org/D87836 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 113 +++ llvm/lib/Target/ARM/ARMISelLowering.h | 4 + llvm/lib/Target/ARM/ARMInstrMVE.td | 34 + .../CodeGen/Thumb2/mve-vecreduce-loops.ll | 105 ++- .../CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll | 647 ++++++++++++++++++ llvm/test/CodeGen/Thumb2/mve-vmaxv.ll | 77 +-- 6 files changed, 859 insertions(+), 121 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 798ecf2487637..a818b66dd96fa 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -987,6 +987,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SMAX); setTargetDAGCombine(ISD::UMAX); setTargetDAGCombine(ISD::FP_EXTEND); + setTargetDAGCombine(ISD::SELECT); + setTargetDAGCombine(ISD::SELECT_CC); } if (!Subtarget->hasFP64()) { @@ -1740,6 +1742,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VMLALVAu: return "ARMISD::VMLALVAu"; case ARMISD::VMLALVAps: return "ARMISD::VMLALVAps"; case ARMISD::VMLALVApu: return "ARMISD::VMLALVApu"; + case ARMISD::VMINVu: return "ARMISD::VMINVu"; + case ARMISD::VMINVs: return "ARMISD::VMINVs"; + case ARMISD::VMAXVu: return "ARMISD::VMAXVu"; + case ARMISD::VMAXVs: return "ARMISD::VMAXVs"; case ARMISD::UMAAL: return "ARMISD::UMAAL"; case ARMISD::UMLAL: return "ARMISD::UMLAL"; case ARMISD::SMLAL: return "ARMISD::SMLAL"; @@ -12093,6 +12099,111 @@ static SDValue PerformAddeSubeCombine(SDNode *N, return SDValue(); } +static SDValue PerformSELECTCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + if (!Subtarget->hasMVEIntegerOps()) + return SDValue(); + + SDLoc dl(N); + SDValue SetCC; + SDValue LHS; + SDValue RHS; + ISD::CondCode CC; + SDValue TrueVal; + SDValue FalseVal; + + if (N->getOpcode() == ISD::SELECT && + N->getOperand(0)->getOpcode() == ISD::SETCC) { + SetCC = N->getOperand(0); + LHS = SetCC->getOperand(0); + RHS = SetCC->getOperand(1); + CC = cast(SetCC->getOperand(2))->get(); + TrueVal = N->getOperand(1); + FalseVal = N->getOperand(2); + } else if (N->getOpcode() == ISD::SELECT_CC) { + LHS = N->getOperand(0); + RHS = N->getOperand(1); + CC = cast(N->getOperand(4))->get(); + TrueVal = N->getOperand(2); + FalseVal = N->getOperand(3); + } else { + return SDValue(); + } + + unsigned int Opcode = 0; + if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN || + FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) && + (CC == ISD::SETULT || CC == ISD::SETUGT)) { + Opcode = ARMISD::VMINVu; + if (CC == ISD::SETUGT) + std::swap(TrueVal, FalseVal); + } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN || + FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) && + (CC == ISD::SETLT || CC == ISD::SETGT)) { + Opcode = ARMISD::VMINVs; + if (CC == ISD::SETGT) + std::swap(TrueVal, FalseVal); + } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX || + FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) && + (CC == ISD::SETUGT || CC == ISD::SETULT)) { + Opcode = ARMISD::VMAXVu; + if (CC == ISD::SETULT) + std::swap(TrueVal, FalseVal); + } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX || + FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) && + (CC == ISD::SETGT || CC == ISD::SETLT)) { + Opcode = ARMISD::VMAXVs; + if (CC == ISD::SETLT) + std::swap(TrueVal, FalseVal); + } else + return SDValue(); + + // Normalise to the right hand side being the vector reduction + switch (TrueVal->getOpcode()) { + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_SMAX: + std::swap(LHS, RHS); + std::swap(TrueVal, FalseVal); + break; + } + + EVT VectorType = FalseVal->getOperand(0).getValueType(); + + if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 && + VectorType != MVT::v4i32) + return SDValue(); + + EVT VectorScalarType = VectorType.getVectorElementType(); + + // The values being selected must also be the ones being compared + if (TrueVal != LHS || FalseVal != RHS) + return SDValue(); + + EVT LeftType = LHS->getValueType(0); + EVT RightType = RHS->getValueType(0); + + // The types must match the reduced type too + if (LeftType != VectorScalarType || RightType != VectorScalarType) + return SDValue(); + + // Legalise the scalar to an i32 + if (VectorScalarType != MVT::i32) + LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); + + // Generate the reduction as an i32 for legalisation purposes + auto Reduction = + DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0)); + + // The result isn't actually an i32 so truncate it back to its original type + if (VectorScalarType != MVT::i32) + Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction); + + return Reduction; +} + static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -16049,6 +16160,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; + case ISD::SELECT_CC: + case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget); case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget); case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget); case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index f5bb097062aff..90cbf1eea0481 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -241,6 +241,10 @@ class VectorType; VMLALVAu, // provided as low and high halves VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask VMLALVApu, + VMINVu, // Find minimum unsigned value of a vector and register + VMINVs, // Find minimum signed value of a vector and register + VMAXVu, // Find maximum unsigned value of a vector and register + VMAXVs, // Find maximum signed value of a vector and register SMULWB, // Signed multiply word by half word, bottom SMULWT, // Signed multiply word by half word, top diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index d9e9cf1176fc7..f7f403503dc7f 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -944,6 +944,14 @@ multiclass MVE_VMINMAXV_ty { defm u32: MVE_VMINMAXV_p; } +def SDTVecReduceR : SDTypeProfile<1, 2, [ // Reduction of an integer and vector into an integer + SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2> +]>; +def ARMVMINVu : SDNode<"ARMISD::VMINVu", SDTVecReduceR>; +def ARMVMINVs : SDNode<"ARMISD::VMINVs", SDTVecReduceR>; +def ARMVMAXVu : SDNode<"ARMISD::VMAXVu", SDTVecReduceR>; +def ARMVMAXVs : SDNode<"ARMISD::VMAXVs", SDTVecReduceR>; + defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">; defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0, "int_arm_mve_maxv">; @@ -974,6 +982,32 @@ let Predicates = [HasMVEInt] in { def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))), (i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>; + def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v16i8 MQPR:$src))), + (i32 (MVE_VMINVu8 $x, $src))>; + def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v8i16 MQPR:$src))), + (i32 (MVE_VMINVu16 $x, $src))>; + def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v4i32 MQPR:$src))), + (i32 (MVE_VMINVu32 $x, $src))>; + def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v16i8 MQPR:$src))), + (i32 (MVE_VMINVs8 $x, $src))>; + def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v8i16 MQPR:$src))), + (i32 (MVE_VMINVs16 $x, $src))>; + def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v4i32 MQPR:$src))), + (i32 (MVE_VMINVs32 $x, $src))>; + + def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v16i8 MQPR:$src))), + (i32 (MVE_VMAXVu8 $x, $src))>; + def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v8i16 MQPR:$src))), + (i32 (MVE_VMAXVu16 $x, $src))>; + def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v4i32 MQPR:$src))), + (i32 (MVE_VMAXVu32 $x, $src))>; + def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v16i8 MQPR:$src))), + (i32 (MVE_VMAXVs8 $x, $src))>; + def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v8i16 MQPR:$src))), + (i32 (MVE_VMAXVs16 $x, $src))>; + def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v4i32 MQPR:$src))), + (i32 (MVE_VMAXVs32 $x, $src))>; + } multiclass MVE_VMINMAXAV_ty { diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll index 382c32dbe2bf5..286277945eb65 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll @@ -790,16 +790,16 @@ for.cond.cleanup: ; preds = %for.body, %middle.b define i32 @smin_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-LABEL: smin_i32_inloop: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cmp r1, #1 ; CHECK-NEXT: blt .LBB8_3 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: mvn r0, #-2147483648 ; CHECK-NEXT: cmp r1, #4 ; CHECK-NEXT: bhs .LBB8_4 ; CHECK-NEXT: @ %bb.2: +; CHECK-NEXT: mvn r0, #-2147483648 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: b .LBB8_7 ; CHECK-NEXT: .LBB8_3: @@ -808,22 +808,20 @@ define i32 @smin_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: .LBB8_4: @ %vector.ph ; CHECK-NEXT: bic r3, r1, #3 ; CHECK-NEXT: movs r2, #1 -; CHECK-NEXT: sub.w lr, r3, #4 -; CHECK-NEXT: add.w lr, r2, lr, lsr #2 +; CHECK-NEXT: subs r0, r3, #4 +; CHECK-NEXT: add.w lr, r2, r0, lsr #2 +; CHECK-NEXT: mvn r0, #-2147483648 ; CHECK-NEXT: mov r2, r12 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB8_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r2], #16 -; CHECK-NEXT: mvn r4, #-2147483648 -; CHECK-NEXT: vminv.s32 r4, q0 -; CHECK-NEXT: cmp r0, r4 -; CHECK-NEXT: csel r0, r0, r4, lt +; CHECK-NEXT: vminv.s32 r0, q0 ; CHECK-NEXT: le lr, .LBB8_5 ; CHECK-NEXT: @ %bb.6: @ %middle.block ; CHECK-NEXT: cmp r3, r1 ; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, pc} +; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB8_7: @ %for.body.preheader1 ; CHECK-NEXT: sub.w lr, r1, r3 ; CHECK-NEXT: add.w r1, r12, r3, lsl #2 @@ -835,7 +833,7 @@ define i32 @smin_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: csel r0, r0, r2, lt ; CHECK-NEXT: le lr, .LBB8_8 ; CHECK-NEXT: .LBB8_9: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: pop {r7, pc} entry: %cmp6 = icmp sgt i32 %n, 0 br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup @@ -988,16 +986,16 @@ for.cond.cleanup: ; preds = %for.body, %middle.b define i32 @smax_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-LABEL: smax_i32_inloop: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cmp r1, #1 ; CHECK-NEXT: blt .LBB10_3 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: mov.w r0, #-2147483648 ; CHECK-NEXT: cmp r1, #4 ; CHECK-NEXT: bhs .LBB10_4 ; CHECK-NEXT: @ %bb.2: +; CHECK-NEXT: mov.w r0, #-2147483648 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: b .LBB10_7 ; CHECK-NEXT: .LBB10_3: @@ -1006,22 +1004,20 @@ define i32 @smax_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: .LBB10_4: @ %vector.ph ; CHECK-NEXT: bic r3, r1, #3 ; CHECK-NEXT: movs r2, #1 -; CHECK-NEXT: sub.w lr, r3, #4 -; CHECK-NEXT: add.w lr, r2, lr, lsr #2 +; CHECK-NEXT: subs r0, r3, #4 +; CHECK-NEXT: add.w lr, r2, r0, lsr #2 +; CHECK-NEXT: mov.w r0, #-2147483648 ; CHECK-NEXT: mov r2, r12 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB10_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r2], #16 -; CHECK-NEXT: mov.w r4, #-2147483648 -; CHECK-NEXT: vmaxv.s32 r4, q0 -; CHECK-NEXT: cmp r0, r4 -; CHECK-NEXT: csel r0, r0, r4, gt +; CHECK-NEXT: vmaxv.s32 r0, q0 ; CHECK-NEXT: le lr, .LBB10_5 ; CHECK-NEXT: @ %bb.6: @ %middle.block ; CHECK-NEXT: cmp r3, r1 ; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, pc} +; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB10_7: @ %for.body.preheader1 ; CHECK-NEXT: sub.w lr, r1, r3 ; CHECK-NEXT: add.w r1, r12, r3, lsl #2 @@ -1033,7 +1029,7 @@ define i32 @smax_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: csel r0, r0, r2, gt ; CHECK-NEXT: le lr, .LBB10_8 ; CHECK-NEXT: .LBB10_9: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: pop {r7, pc} entry: %cmp6 = icmp sgt i32 %n, 0 br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup @@ -1186,16 +1182,16 @@ for.cond.cleanup: ; preds = %for.body, %middle.b define i32 @umin_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-LABEL: umin_i32_inloop: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cmp r1, #1 ; CHECK-NEXT: blt .LBB12_3 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: mov.w r0, #-1 ; CHECK-NEXT: cmp r1, #4 ; CHECK-NEXT: bhs .LBB12_4 ; CHECK-NEXT: @ %bb.2: +; CHECK-NEXT: mov.w r0, #-1 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: b .LBB12_7 ; CHECK-NEXT: .LBB12_3: @@ -1204,22 +1200,20 @@ define i32 @umin_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: .LBB12_4: @ %vector.ph ; CHECK-NEXT: bic r3, r1, #3 ; CHECK-NEXT: movs r2, #1 -; CHECK-NEXT: sub.w lr, r3, #4 -; CHECK-NEXT: add.w lr, r2, lr, lsr #2 +; CHECK-NEXT: subs r0, r3, #4 +; CHECK-NEXT: add.w lr, r2, r0, lsr #2 +; CHECK-NEXT: mov.w r0, #-1 ; CHECK-NEXT: mov r2, r12 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB12_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r2], #16 -; CHECK-NEXT: mov.w r4, #-1 -; CHECK-NEXT: vminv.u32 r4, q0 -; CHECK-NEXT: cmp r0, r4 -; CHECK-NEXT: csel r0, r0, r4, lo +; CHECK-NEXT: vminv.u32 r0, q0 ; CHECK-NEXT: le lr, .LBB12_5 ; CHECK-NEXT: @ %bb.6: @ %middle.block ; CHECK-NEXT: cmp r3, r1 ; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, pc} +; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB12_7: @ %for.body.preheader1 ; CHECK-NEXT: sub.w lr, r1, r3 ; CHECK-NEXT: add.w r1, r12, r3, lsl #2 @@ -1231,7 +1225,7 @@ define i32 @umin_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: csel r0, r0, r2, hi ; CHECK-NEXT: le lr, .LBB12_8 ; CHECK-NEXT: .LBB12_9: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: pop {r7, pc} entry: %cmp6 = icmp sgt i32 %n, 0 br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup @@ -1384,17 +1378,22 @@ for.cond.cleanup: ; preds = %for.body, %middle.b define i32 @umax_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-LABEL: umax_i32_inloop: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cmp r1, #1 -; CHECK-NEXT: blt .LBB14_8 +; CHECK-NEXT: blt .LBB14_3 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: cmp r1, #4 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: blo .LBB14_5 -; CHECK-NEXT: @ %bb.2: @ %vector.ph +; CHECK-NEXT: bhs .LBB14_4 +; CHECK-NEXT: @ %bb.2: +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: b .LBB14_7 +; CHECK-NEXT: .LBB14_3: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: b .LBB14_9 +; CHECK-NEXT: .LBB14_4: @ %vector.ph ; CHECK-NEXT: bic r3, r1, #3 ; CHECK-NEXT: movs r2, #1 ; CHECK-NEXT: subs r0, r3, #4 @@ -1402,33 +1401,27 @@ define i32 @umax_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: mov r2, r12 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB14_3: @ %vector.body +; CHECK-NEXT: .LBB14_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r2], #16 -; CHECK-NEXT: movs r4, #0 -; CHECK-NEXT: vmaxv.u32 r4, q0 -; CHECK-NEXT: cmp r0, r4 -; CHECK-NEXT: csel r0, r0, r4, hi -; CHECK-NEXT: le lr, .LBB14_3 -; CHECK-NEXT: @ %bb.4: @ %middle.block +; CHECK-NEXT: vmaxv.u32 r0, q0 +; CHECK-NEXT: le lr, .LBB14_5 +; CHECK-NEXT: @ %bb.6: @ %middle.block ; CHECK-NEXT: cmp r3, r1 ; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, pc} -; CHECK-NEXT: .LBB14_5: @ %for.body.preheader1 +; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB14_7: @ %for.body.preheader1 ; CHECK-NEXT: sub.w lr, r1, r3 ; CHECK-NEXT: add.w r1, r12, r3, lsl #2 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB14_6: @ %for.body +; CHECK-NEXT: .LBB14_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r2, [r1], #4 ; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: csel r0, r0, r2, hi -; CHECK-NEXT: le lr, .LBB14_6 -; CHECK-NEXT: @ %bb.7: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, pc} -; CHECK-NEXT: .LBB14_8: -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: le lr, .LBB14_8 +; CHECK-NEXT: .LBB14_9: @ %for.cond.cleanup +; CHECK-NEXT: pop {r7, pc} entry: %cmp6 = icmp sgt i32 %n, 0 br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll new file mode 100644 index 0000000000000..13b831efabc57 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll @@ -0,0 +1,647 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s + +define arm_aapcs_vfpcc zeroext i8 @uminv16i8(<16 x i8> %vec, i8 zeroext %min) { +; CHECK-LABEL: uminv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u8 r0, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec) + %cmp = icmp ult i8 %x, %min + %1 = select i1 %cmp, i8 %x, i8 %min + ret i8 %1 +} + +define arm_aapcs_vfpcc zeroext i16 @uminv8i16(<8 x i16> %vec, i16 zeroext %min) { +; CHECK-LABEL: uminv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u16 r0, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec) + %cmp = icmp ult i16 %x, %min + %1 = select i1 %cmp, i16 %x, i16 %min + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @uminv4i32(<4 x i32> %vec, i32 %min) { +; CHECK-LABEL: uminv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec) + %cmp = icmp ult i32 %x, %min + %1 = select i1 %cmp, i32 %x, i32 %min + ret i32 %1 +} + +define arm_aapcs_vfpcc signext i8 @sminv16i8(<16 x i8> %vec, i8 signext %min) { +; CHECK-LABEL: sminv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s8 r0, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec) + %cmp = icmp slt i8 %x, %min + %1 = select i1 %cmp, i8 %x, i8 %min + ret i8 %1 +} + +define arm_aapcs_vfpcc signext i16 @sminv8i16(<8 x i16> %vec, i16 signext %min) { +; CHECK-LABEL: sminv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s16 r0, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec) + %cmp = icmp slt i16 %x, %min + %1 = select i1 %cmp, i16 %x, i16 %min + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @sminv4i32(<4 x i32> %vec, i32 %min) { +; CHECK-LABEL: sminv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec) + %cmp = icmp slt i32 %x, %min + %1 = select i1 %cmp, i32 %x, i32 %min + ret i32 %1 +} + +define arm_aapcs_vfpcc zeroext i8 @umaxv16i8(<16 x i8> %vec, i8 zeroext %max) { +; CHECK-LABEL: umaxv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u8 r0, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec) + %cmp = icmp ugt i8 %x, %max + %1 = select i1 %cmp, i8 %x, i8 %max + ret i8 %1 +} + +define arm_aapcs_vfpcc zeroext i16 @umaxv8i16(<8 x i16> %vec, i16 zeroext %max) { +; CHECK-LABEL: umaxv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u16 r0, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec) + %cmp = icmp ugt i16 %x, %max + %1 = select i1 %cmp, i16 %x, i16 %max + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @umaxv4i32(<4 x i32> %vec, i32 %max) { +; CHECK-LABEL: umaxv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec) + %cmp = icmp ugt i32 %x, %max + %1 = select i1 %cmp, i32 %x, i32 %max + ret i32 %1 +} + +define arm_aapcs_vfpcc signext i8 @smaxv16i8(<16 x i8> %vec, i8 signext %max) { +; CHECK-LABEL: smaxv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s8 r0, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) + %cmp = icmp sgt i8 %x, %max + %1 = select i1 %cmp, i8 %x, i8 %max + ret i8 %1 +} + +define arm_aapcs_vfpcc signext i16 @smaxv8i16(<8 x i16> %vec, i16 signext %max) { +; CHECK-LABEL: smaxv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s16 r0, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec) + %cmp = icmp sgt i16 %x, %max + %1 = select i1 %cmp, i16 %x, i16 %max + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @smaxv4i32(<4 x i32> %vec, i32 %max) { +; CHECK-LABEL: smaxv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec) + %cmp = icmp sgt i32 %x, %max + %1 = select i1 %cmp, i32 %x, i32 %max + ret i32 %1 +} + +define arm_aapcs_vfpcc zeroext i8 @commute_uminv16i8(<16 x i8> %vec, i8 zeroext %min) { +; CHECK-LABEL: commute_uminv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u8 r0, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec) + %cmp = icmp ult i8 %min, %x + %1 = select i1 %cmp, i8 %min, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc zeroext i16 @commute_uminv8i16(<8 x i16> %vec, i16 zeroext %min) { +; CHECK-LABEL: commute_uminv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u16 r0, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec) + %cmp = icmp ult i16 %min, %x + %1 = select i1 %cmp, i16 %min, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @commute_uminv4i32(<4 x i32> %vec, i32 %min) { +; CHECK-LABEL: commute_uminv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec) + %cmp = icmp ult i32 %min, %x + %1 = select i1 %cmp, i32 %min, i32 %x + ret i32 %1 +} + +define arm_aapcs_vfpcc signext i8 @commute_sminv16i8(<16 x i8> %vec, i8 signext %min) { +; CHECK-LABEL: commute_sminv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s8 r0, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec) + %cmp = icmp slt i8 %min, %x + %1 = select i1 %cmp, i8 %min, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc signext i16 @commute_sminv8i16(<8 x i16> %vec, i16 signext %min) { +; CHECK-LABEL: commute_sminv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s16 r0, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec) + %cmp = icmp slt i16 %min, %x + %1 = select i1 %cmp, i16 %min, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @commute_sminv4i32(<4 x i32> %vec, i32 %min) { +; CHECK-LABEL: commute_sminv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec) + %cmp = icmp slt i32 %min, %x + %1 = select i1 %cmp, i32 %min, i32 %x + ret i32 %1 +} + +define arm_aapcs_vfpcc zeroext i8 @commute_umaxv16i8(<16 x i8> %vec, i8 zeroext %max) { +; CHECK-LABEL: commute_umaxv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u8 r0, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec) + %cmp = icmp ugt i8 %max, %x + %1 = select i1 %cmp, i8 %max, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc zeroext i16 @commute_umaxv8i16(<8 x i16> %vec, i16 zeroext %max) { +; CHECK-LABEL: commute_umaxv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u16 r0, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec) + %cmp = icmp ugt i16 %max, %x + %1 = select i1 %cmp, i16 %max, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @commute_umaxv4i32(<4 x i32> %vec, i32 %max) { +; CHECK-LABEL: commute_umaxv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec) + %cmp = icmp ugt i32 %max, %x + %1 = select i1 %cmp, i32 %max, i32 %x + ret i32 %1 +} + +define arm_aapcs_vfpcc signext i8 @commute_smaxv16i8(<16 x i8> %vec, i8 signext %max) { +; CHECK-LABEL: commute_smaxv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s8 r0, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) + %cmp = icmp sgt i8 %max, %x + %1 = select i1 %cmp, i8 %max, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc signext i16 @commute_smaxv8i16(<8 x i16> %vec, i16 signext %max) { +; CHECK-LABEL: commute_smaxv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s16 r0, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec) + %cmp = icmp sgt i16 %max, %x + %1 = select i1 %cmp, i16 %max, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @commute_smaxv4i32(<4 x i32> %vec, i32 %max) { +; CHECK-LABEL: commute_smaxv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec) + %cmp = icmp sgt i32 %max, %x + %1 = select i1 %cmp, i32 %max, i32 %x + ret i32 %1 +} + +define arm_aapcs_vfpcc signext i8 @mismatch_smaxv16i8(<16 x i8> %vec, i8 signext %max) { +; CHECK-LABEL: mismatch_smaxv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: mvn r1, #127 +; CHECK-NEXT: vmaxv.s8 r1, q0 +; CHECK-NEXT: sxtb r2, r1 +; CHECK-NEXT: cmp r2, r0 +; CHECK-NEXT: csel r0, r0, r1, gt +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) + %cmp = icmp sgt i8 %x, %max + %1 = select i1 %cmp, i8 %max, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc signext i8 @mismatch2_smaxv16i8(<16 x i8> %vec, i8 signext %max) { +; CHECK-LABEL: mismatch2_smaxv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: mvn r1, #127 +; CHECK-NEXT: vmaxv.s8 r1, q0 +; CHECK-NEXT: sxtb r2, r1 +; CHECK-NEXT: cmp r0, r2 +; CHECK-NEXT: csel r0, r1, r0, gt +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) + %cmp = icmp sgt i8 %max, %x + %1 = select i1 %cmp, i8 %x, i8 %max + ret i8 %1 +} + +define arm_aapcs_vfpcc zeroext i8 @inverted_uminv16i8(<16 x i8> %vec, i8 zeroext %min) { +; CHECK-LABEL: inverted_uminv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u8 r0, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec) + %cmp = icmp ugt i8 %x, %min + %1 = select i1 %cmp, i8 %min, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc zeroext i16 @inverted_uminv8i16(<8 x i16> %vec, i16 zeroext %min) { +; CHECK-LABEL: inverted_uminv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u16 r0, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec) + %cmp = icmp ugt i16 %x, %min + %1 = select i1 %cmp, i16 %min, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @inverted_uminv4i32(<4 x i32> %vec, i32 %min) { +; CHECK-LABEL: inverted_uminv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.u32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec) + %cmp = icmp ugt i32 %x, %min + %1 = select i1 %cmp, i32 %min, i32 %x + ret i32 %1 +} + +define arm_aapcs_vfpcc signext i8 @inverted_sminv16i8(<16 x i8> %vec, i8 signext %min) { +; CHECK-LABEL: inverted_sminv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s8 r0, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec) + %cmp = icmp sgt i8 %x, %min + %1 = select i1 %cmp, i8 %min, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc signext i16 @inverted_sminv8i16(<8 x i16> %vec, i16 signext %min) { +; CHECK-LABEL: inverted_sminv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s16 r0, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec) + %cmp = icmp sgt i16 %x, %min + %1 = select i1 %cmp, i16 %min, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @inverted_sminv4i32(<4 x i32> %vec, i32 %min) { +; CHECK-LABEL: inverted_sminv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vminv.s32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec) + %cmp = icmp sgt i32 %x, %min + %1 = select i1 %cmp, i32 %min, i32 %x + ret i32 %1 +} + +define arm_aapcs_vfpcc zeroext i8 @inverted_umaxv16i8(<16 x i8> %vec, i8 zeroext %max) { +; CHECK-LABEL: inverted_umaxv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u8 r0, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec) + %cmp = icmp ult i8 %x, %max + %1 = select i1 %cmp, i8 %max, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc zeroext i16 @inverted_umaxv8i16(<8 x i16> %vec, i16 zeroext %max) { +; CHECK-LABEL: inverted_umaxv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u16 r0, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec) + %cmp = icmp ult i16 %x, %max + %1 = select i1 %cmp, i16 %max, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @inverted_umaxv4i32(<4 x i32> %vec, i32 %max) { +; CHECK-LABEL: inverted_umaxv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.u32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec) + %cmp = icmp ult i32 %x, %max + %1 = select i1 %cmp, i32 %max, i32 %x + ret i32 %1 +} + +define arm_aapcs_vfpcc signext i8 @inverted_smaxv16i8(<16 x i8> %vec, i8 signext %max) { +; CHECK-LABEL: inverted_smaxv16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s8 r0, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: bx lr + %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) + %cmp = icmp slt i8 %x, %max + %1 = select i1 %cmp, i8 %max, i8 %x + ret i8 %1 +} + +define arm_aapcs_vfpcc signext i16 @inverted_smaxv8i16(<8 x i16> %vec, i16 signext %max) { +; CHECK-LABEL: inverted_smaxv8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s16 r0, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec) + %cmp = icmp slt i16 %x, %max + %1 = select i1 %cmp, i16 %max, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i32 @inverted_smaxv4i32(<4 x i32> %vec, i32 %max) { +; CHECK-LABEL: inverted_smaxv4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmaxv.s32 r0, q0 +; CHECK-NEXT: bx lr + %x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec) + %cmp = icmp slt i32 %x, %max + %1 = select i1 %cmp, i32 %max, i32 %x + ret i32 %1 +} + +define arm_aapcs_vfpcc signext i16 @trunc_and_sext(<8 x i16> %vec, i32 %max) #1 { +; CHECK-LABEL: trunc_and_sext: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, #32768 +; CHECK-NEXT: movt r1, #65535 +; CHECK-NEXT: vmaxv.s16 r1, q0 +; CHECK-NEXT: sxth r2, r1 +; CHECK-NEXT: cmp r0, r2 +; CHECK-NEXT: csel r0, r0, r1, gt +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec) + %xs = sext i16 %x to i32 + %cmp = icmp sgt i32 %max, %xs + %mt = trunc i32 %max to i16 + %1 = select i1 %cmp, i16 %mt, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc signext i16 @trunc_and_zext(<8 x i16> %vec, i32 %max) #1 { +; CHECK-LABEL: trunc_and_zext: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: vmaxv.u16 r1, q0 +; CHECK-NEXT: uxth r2, r1 +; CHECK-NEXT: cmp r0, r2 +; CHECK-NEXT: csel r0, r0, r1, gt +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + %x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec) + %xs = zext i16 %x to i32 + %cmp = icmp sgt i32 %max, %xs + %mt = trunc i32 %max to i16 + %1 = select i1 %cmp, i16 %mt, i16 %x + ret i16 %1 +} + +define arm_aapcs_vfpcc i64 @uminv2i64(<2 x i64> %vec, i64 %min) { +; CHECK-LABEL: uminv2i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vmov r12, s3 +; CHECK-NEXT: vmov lr, s1 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: cmp lr, r12 +; CHECK-NEXT: csel r4, r2, r3, lo +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: csel r2, r2, r3, lo +; CHECK-NEXT: cmp lr, r12 +; CHECK-NEXT: csel r5, r2, r4, eq +; CHECK-NEXT: csel r3, lr, r12, lo +; CHECK-NEXT: subs r2, r5, r0 +; CHECK-NEXT: mov.w r4, #0 +; CHECK-NEXT: sbcs.w r2, r3, r1 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r0, r5, r0, ne +; CHECK-NEXT: csel r1, r3, r1, ne +; CHECK-NEXT: pop {r4, r5, r7, pc} + %x = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %vec) + %cmp = icmp ult i64 %x, %min + %1 = select i1 %cmp, i64 %x, i64 %min + ret i64 %1 +} + +define arm_aapcs_vfpcc i64 @sminv2i64(<2 x i64> %vec, i64 %min) { +; CHECK-LABEL: sminv2i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vmov r12, s3 +; CHECK-NEXT: vmov lr, s1 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: cmp lr, r12 +; CHECK-NEXT: csel r4, r2, r3, lt +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: csel r2, r2, r3, lo +; CHECK-NEXT: cmp lr, r12 +; CHECK-NEXT: csel r5, r2, r4, eq +; CHECK-NEXT: csel r3, lr, r12, lt +; CHECK-NEXT: subs r2, r5, r0 +; CHECK-NEXT: mov.w r4, #0 +; CHECK-NEXT: sbcs.w r2, r3, r1 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r0, r5, r0, ne +; CHECK-NEXT: csel r1, r3, r1, ne +; CHECK-NEXT: pop {r4, r5, r7, pc} + %x = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %vec) + %cmp = icmp slt i64 %x, %min + %1 = select i1 %cmp, i64 %x, i64 %min + ret i64 %1 +} + +define arm_aapcs_vfpcc i64 @umaxv2i64(<2 x i64> %vec, i64 %max) { +; CHECK-LABEL: umaxv2i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vmov r12, s3 +; CHECK-NEXT: vmov lr, s1 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: cmp lr, r12 +; CHECK-NEXT: csel r4, r2, r3, hi +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: csel r2, r2, r3, hi +; CHECK-NEXT: cmp lr, r12 +; CHECK-NEXT: csel r5, r2, r4, eq +; CHECK-NEXT: csel r3, lr, r12, hi +; CHECK-NEXT: subs r2, r0, r5 +; CHECK-NEXT: mov.w r4, #0 +; CHECK-NEXT: sbcs.w r2, r1, r3 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r0, r5, r0, ne +; CHECK-NEXT: csel r1, r3, r1, ne +; CHECK-NEXT: pop {r4, r5, r7, pc} + %x = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %vec) + %cmp = icmp ugt i64 %x, %max + %1 = select i1 %cmp, i64 %x, i64 %max + ret i64 %1 +} + +define arm_aapcs_vfpcc i64 @smaxv2i64(<2 x i64> %vec, i64 %max) { +; CHECK-LABEL: smaxv2i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vmov r12, s3 +; CHECK-NEXT: vmov lr, s1 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: cmp lr, r12 +; CHECK-NEXT: csel r4, r2, r3, gt +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: csel r2, r2, r3, hi +; CHECK-NEXT: cmp lr, r12 +; CHECK-NEXT: csel r5, r2, r4, eq +; CHECK-NEXT: csel r3, lr, r12, gt +; CHECK-NEXT: subs r2, r0, r5 +; CHECK-NEXT: mov.w r4, #0 +; CHECK-NEXT: sbcs.w r2, r1, r3 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r0, r5, r0, ne +; CHECK-NEXT: csel r1, r3, r1, ne +; CHECK-NEXT: pop {r4, r5, r7, pc} + %x = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %vec) + %cmp = icmp sgt i64 %x, %max + %1 = select i1 %cmp, i64 %x, i64 %max + ret i64 %1 +} + +declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>) + +declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>) + +declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) + +declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>) + +declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>) + +declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>) + +declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) + +declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>) + +declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>) + +declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>) + +declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>) + +declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>) + +declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>) + +declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>) + +declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) + +declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>) diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll index eca5f44904a16..80c8ae65e4b7f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll @@ -140,12 +140,7 @@ define arm_aapcs_vfpcc i32 @vminv_u_v4i32(<4 x i32> %s1) { define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8_i8(<16 x i8> %s1, i8 %s2) { ; CHECK-LABEL: vmaxv_s_v16i8_i8: ; CHECK: @ %bb.0: -; CHECK-NEXT: mvn r1, #127 -; CHECK-NEXT: sxtb r3, r0 -; CHECK-NEXT: vmaxv.s8 r1, q0 -; CHECK-NEXT: sxtb r2, r1 -; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: csel r0, r1, r0, gt +; CHECK-NEXT: vmaxv.s8 r0, q0 ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %s1) %c = icmp sgt i8 %r, %s2 @@ -172,13 +167,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_s_v16i8_i32(<16 x i8> %s1, i32 %s2) { define arm_aapcs_vfpcc i16 @vmaxv_s_v8i16_i16(<8 x i16> %s1, i16 %s2) { ; CHECK-LABEL: vmaxv_s_v8i16_i16: ; CHECK: @ %bb.0: -; CHECK-NEXT: movw r1, #32768 -; CHECK-NEXT: sxth r3, r0 -; CHECK-NEXT: movt r1, #65535 -; CHECK-NEXT: vmaxv.s16 r1, q0 -; CHECK-NEXT: sxth r2, r1 -; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: csel r0, r1, r0, gt +; CHECK-NEXT: vmaxv.s16 r0, q0 ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %s1) %c = icmp sgt i16 %r, %s2 @@ -206,10 +195,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_s_v8i16_i32(<8 x i16> %s1, i32 %s2) { define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) { ; CHECK-LABEL: vmaxv_s_v4i32_i32: ; CHECK: @ %bb.0: -; CHECK-NEXT: mov.w r1, #-2147483648 -; CHECK-NEXT: vmaxv.s32 r1, q0 -; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: csel r0, r1, r0, gt +; CHECK-NEXT: vmaxv.s32 r0, q0 ; CHECK-NEXT: bx lr %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %s1) %c = icmp sgt i32 %r, %s2 @@ -220,12 +206,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) { define arm_aapcs_vfpcc i8 @vmaxv_u_v16i8_i8(<16 x i8> %s1, i8 %s2) { ; CHECK-LABEL: vmaxv_u_v16i8_i8: ; CHECK: @ %bb.0: -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: uxtb r3, r0 -; CHECK-NEXT: vmaxv.u8 r1, q0 -; CHECK-NEXT: uxtb r2, r1 -; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: csel r0, r1, r0, hi +; CHECK-NEXT: vmaxv.u8 r0, q0 ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %s1) %c = icmp ugt i8 %r, %s2 @@ -252,12 +233,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_u_v16i8_i32(<16 x i8> %s1, i32 %s2) { define arm_aapcs_vfpcc i16 @vmaxv_u_v8i16_i16(<8 x i16> %s1, i16 %s2) { ; CHECK-LABEL: vmaxv_u_v8i16_i16: ; CHECK: @ %bb.0: -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: uxth r3, r0 -; CHECK-NEXT: vmaxv.u16 r1, q0 -; CHECK-NEXT: uxth r2, r1 -; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: csel r0, r1, r0, hi +; CHECK-NEXT: vmaxv.u16 r0, q0 ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %s1) %c = icmp ugt i16 %r, %s2 @@ -284,10 +260,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_u_v8i16_i32(<8 x i16> %s1, i32 %s2) { define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) { ; CHECK-LABEL: vmaxv_u_v4i32_i32: ; CHECK: @ %bb.0: -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vmaxv.u32 r1, q0 -; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: csel r0, r1, r0, hi +; CHECK-NEXT: vmaxv.u32 r0, q0 ; CHECK-NEXT: bx lr %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %s1) %c = icmp ugt i32 %r, %s2 @@ -298,12 +271,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) { define arm_aapcs_vfpcc i8 @vminv_s_v16i8_i8(<16 x i8> %s1, i8 %s2) { ; CHECK-LABEL: vminv_s_v16i8_i8: ; CHECK: @ %bb.0: -; CHECK-NEXT: movs r1, #127 -; CHECK-NEXT: sxtb r3, r0 -; CHECK-NEXT: vminv.s8 r1, q0 -; CHECK-NEXT: sxtb r2, r1 -; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: csel r0, r1, r0, lt +; CHECK-NEXT: vminv.s8 r0, q0 ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %s1) %c = icmp slt i8 %r, %s2 @@ -330,12 +298,7 @@ define arm_aapcs_vfpcc i32 @vminv_s_v16i8_i32(<16 x i8> %s1, i32 %s2) { define arm_aapcs_vfpcc i16 @vminv_s_v8i16_i16(<8 x i16> %s1, i16 %s2) { ; CHECK-LABEL: vminv_s_v8i16_i16: ; CHECK: @ %bb.0: -; CHECK-NEXT: movw r1, #32767 -; CHECK-NEXT: sxth r3, r0 -; CHECK-NEXT: vminv.s16 r1, q0 -; CHECK-NEXT: sxth r2, r1 -; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: csel r0, r1, r0, lt +; CHECK-NEXT: vminv.s16 r0, q0 ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %s1) %c = icmp slt i16 %r, %s2 @@ -362,10 +325,7 @@ define arm_aapcs_vfpcc i32 @vminv_s_v8i16_i32(<8 x i16> %s1, i32 %s2) { define arm_aapcs_vfpcc i32 @vminv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) { ; CHECK-LABEL: vminv_s_v4i32_i32: ; CHECK: @ %bb.0: -; CHECK-NEXT: mvn r1, #-2147483648 -; CHECK-NEXT: vminv.s32 r1, q0 -; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: csel r0, r1, r0, lt +; CHECK-NEXT: vminv.s32 r0, q0 ; CHECK-NEXT: bx lr %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %s1) %c = icmp slt i32 %r, %s2 @@ -376,12 +336,7 @@ define arm_aapcs_vfpcc i32 @vminv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) { define arm_aapcs_vfpcc i8 @vminv_u_v16i8_i8(<16 x i8> %s1, i8 %s2) { ; CHECK-LABEL: vminv_u_v16i8_i8: ; CHECK: @ %bb.0: -; CHECK-NEXT: movs r1, #255 -; CHECK-NEXT: uxtb r3, r0 -; CHECK-NEXT: vminv.u8 r1, q0 -; CHECK-NEXT: uxtb r2, r1 -; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: csel r0, r1, r0, lo +; CHECK-NEXT: vminv.u8 r0, q0 ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %s1) %c = icmp ult i8 %r, %s2 @@ -408,12 +363,7 @@ define arm_aapcs_vfpcc i32 @vminv_u_v16i8_i32(<16 x i8> %s1, i32 %s2) { define arm_aapcs_vfpcc i16 @vminv_u_v8i16_i16(<8 x i16> %s1, i16 %s2) { ; CHECK-LABEL: vminv_u_v8i16_i16: ; CHECK: @ %bb.0: -; CHECK-NEXT: movw r1, #65535 -; CHECK-NEXT: uxth r3, r0 -; CHECK-NEXT: vminv.u16 r1, q0 -; CHECK-NEXT: uxth r2, r1 -; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: csel r0, r1, r0, lo +; CHECK-NEXT: vminv.u16 r0, q0 ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %s1) %c = icmp ult i16 %r, %s2 @@ -440,10 +390,7 @@ define arm_aapcs_vfpcc i32 @vminv_u_v8i16_i32(<8 x i16> %s1, i32 %s2) { define arm_aapcs_vfpcc i32 @vminv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) { ; CHECK-LABEL: vminv_u_v4i32_i32: ; CHECK: @ %bb.0: -; CHECK-NEXT: mov.w r1, #-1 -; CHECK-NEXT: vminv.u32 r1, q0 -; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: csel r0, r1, r0, lo +; CHECK-NEXT: vminv.u32 r0, q0 ; CHECK-NEXT: bx lr %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %s1) %c = icmp ult i32 %r, %s2 From 8a3cbb1535a92dcc0ac3bd8fc64216a465b8506a Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 6 Oct 2020 15:46:40 +0200 Subject: [PATCH 138/321] [clangd] Add basic keyword-name-validation in rename. Differential Revision: https://reviews.llvm.org/D88875 --- clang-tools-extra/clangd/ClangdServer.cpp | 6 +++--- clang-tools-extra/clangd/ClangdServer.h | 3 --- clang-tools-extra/clangd/refactor/Rename.cpp | 7 +++++++ clang-tools-extra/clangd/unittests/RenameTests.cpp | 8 +++++++- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 0840155fc8f96..d38e115a6796b 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -412,9 +412,9 @@ void ClangdServer::prepareRename(PathRef File, Position Pos, // - for cross-file rename, we deliberately pass a nullptr index to save // the cost, thus the result may be incomplete as it only contains // main-file occurrences; - auto Results = clangd::rename({Pos, /*NewName*/ "", InpAST->AST, File, - RenameOpts.AllowCrossFile ? nullptr : Index, - RenameOpts}); + auto Results = clangd::rename( + {Pos, /*NewName=*/"__clangd_rename_dummy", InpAST->AST, File, + RenameOpts.AllowCrossFile ? nullptr : Index, RenameOpts}); if (!Results) { // LSP says to return null on failure, but that will result in a generic // failure message. If we send an LSP error response, clients can surface diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index a9d46fa5278fe..d03f500697463 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -273,9 +273,6 @@ class ClangdServer { StringRef TriggerText, Callback> CB); /// Test the validity of a rename operation. - /// - /// The returned result describes edits in the main-file only (all - /// occurrences of the renamed symbol are simply deleted. void prepareRename(PathRef File, Position Pos, const RenameOptions &RenameOpts, Callback CB); diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp index 9de3302564fd5..e072853dac9f1 100644 --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -120,6 +120,9 @@ enum class ReasonToReject { UsedOutsideFile, // for within-file rename only. UnsupportedSymbol, AmbiguousSymbol, + + // name validation. + RenameToKeywords, }; llvm::Optional renameable(const NamedDecl &RenameDecl, @@ -208,6 +211,8 @@ llvm::Error makeError(ReasonToReject Reason) { return "symbol is not a supported kind (e.g. namespace, macro)"; case ReasonToReject::AmbiguousSymbol: return "there are multiple symbols at the given location"; + case ReasonToReject::RenameToKeywords: + return "the chosen name is a keyword"; } llvm_unreachable("unhandled reason kind"); }; @@ -471,6 +476,8 @@ llvm::Expected rename(const RenameInputs &RInputs) { return makeError(ReasonToReject::NoSymbolFound); if (DeclsUnderCursor.size() > 1) return makeError(ReasonToReject::AmbiguousSymbol); + if (isKeyword(RInputs.NewName, AST.getLangOpts())) + return makeError(ReasonToReject::RenameToKeywords); const auto &RenameDecl = llvm::cast(*(*DeclsUnderCursor.begin())->getCanonicalDecl()); diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp index cc2454e9d04e8..d925dfa36f500 100644 --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -516,6 +516,7 @@ TEST(RenameTest, Renameable) { const char* ErrorMessage; // null if no error bool IsHeaderFile; const SymbolIndex *Index; + llvm::StringRef NewName = "DummyName"; }; TestTU OtherFile = TestTU::withCode("Outside s; auto ss = &foo;"); const char *CommonHeader = R"cpp( @@ -542,6 +543,11 @@ TEST(RenameTest, Renameable) { )cpp", nullptr, HeaderFile, Index}, + {R"cpp( + void ^f(); + )cpp", + "keyword", HeaderFile, Index, "return"}, + {R"cpp(// disallow -- symbol is indexable and has other refs in index. void f() { Out^side s; @@ -639,7 +645,7 @@ TEST(RenameTest, Renameable) { TU.ExtraArgs.push_back("-xobjective-c++-header"); } auto AST = TU.build(); - llvm::StringRef NewName = "dummyNewName"; + llvm::StringRef NewName = Case.NewName; auto Results = rename({T.point(), NewName, AST, testPath(TU.Filename), Case.Index}); bool WantRename = true; From 75d33a3a97c6f6e65ef5139a4a12508716842601 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 6 Oct 2020 13:44:17 +0100 Subject: [PATCH 139/321] [InstCombine] FoldShiftByConstant - consistently use ConstantExpr in logicalshift(trunc(shift(x,c1)),c2) fold. NFCI. This still only gets used for scalar types but now always uses ConstantExpr in preparation for vector support - it was using APInt methods in some places. --- .../Transforms/InstCombine/InstCombineShifts.cpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 6e12f8011a360..8ddffe34bdb8c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -705,24 +705,16 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, // other xforms later if dead. unsigned SrcSize = SrcTy->getScalarSizeInBits(); unsigned DstSize = TI->getType()->getScalarSizeInBits(); - APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); + Constant *MaskV = + ConstantInt::get(SrcTy, APInt::getLowBitsSet(SrcSize, DstSize)); // The mask we constructed says what the trunc would do if occurring // between the shifts. We want to know the effect *after* the second // shift. We know that it is a logical shift by a constant, so adjust the // mask as appropriate. - if (I.getOpcode() == Instruction::Shl) - MaskV <<= Op1C->getZExtValue(); - else { - assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); - MaskV.lshrInPlace(Op1C->getZExtValue()); - } - + MaskV = ConstantExpr::get(I.getOpcode(), MaskV, ShAmt); // shift1 & 0x00FF - Value *And = Builder.CreateAnd(NSh, - ConstantInt::get(I.getContext(), MaskV), - TI->getName()); - + Value *And = Builder.CreateAnd(NSh, MaskV, TI->getName()); // Return the value truncated to the interesting size. return new TruncInst(And, I.getType()); } From 17b9a91ec274a527e734321701d2791368f146c9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 6 Oct 2020 13:52:25 +0100 Subject: [PATCH 140/321] [InstCombine] canRewriteGEPAsOffset - don't dereference a dyn_cast<>. NFCI. We know V is a IntToPtrInst or PtrToIntInst type so we know its a CastInst - so use cast<> directly. Prevents clang static analyzer warning that we could deference a null pointer. --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 7a7de4db80330..4f2d350d82ccb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -552,7 +552,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base, return false; if (isa(V) || isa(V)) { - auto *CI = dyn_cast(V); + auto *CI = cast(V); if (!CI->isNoopCast(DL)) return false; From 37c74dfe72ecf4e7def22702c5a944682a7865df Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Tue, 6 Oct 2020 15:28:19 +0200 Subject: [PATCH 141/321] Revert "[c++17] Implement P0145R3 during constant evaluation." This reverts commit ded79be63555f4e5bfdb0db27ef22b71fe568474. It causes a crash (I sent the crash reproducer directly to the author). --- clang/lib/AST/ExprConstant.cpp | 95 ++++++--------- .../SemaCXX/constant-expression-cxx1z.cpp | 109 ------------------ clang/www/cxx_status.html | 1 - 3 files changed, 36 insertions(+), 169 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 49ad01f275455..4460e3a17e6da 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -1856,12 +1856,8 @@ void CallStackFrame::describe(raw_ostream &Out) { Out << ", "; const ParmVarDecl *Param = *I; - if (Arguments) { - const APValue &Arg = Arguments[ArgIndex]; - Arg.printPretty(Out, Info.Ctx, Param->getType()); - } else { - Out << "<...>"; - } + const APValue &Arg = Arguments[ArgIndex]; + Arg.printPretty(Out, Info.Ctx, Param->getType()); if (ArgIndex == 0 && IsMemberCall) Out << "->" << *Callee << '('; @@ -5796,8 +5792,6 @@ typedef SmallVector ArgVector; /// EvaluateArgs - Evaluate the arguments to a function call. static bool EvaluateArgs(ArrayRef Args, ArgVector &ArgValues, EvalInfo &Info, const FunctionDecl *Callee) { - ArgValues.resize(Args.size()); - bool Success = true; llvm::SmallBitVector ForbiddenNullArgs; if (Callee->hasAttr()) { @@ -5815,6 +5809,8 @@ static bool EvaluateArgs(ArrayRef Args, ArgVector &ArgValues, } } } + // FIXME: This is the wrong evaluation order for an assignment operator + // called via operator syntax. for (unsigned Idx = 0; Idx < Args.size(); Idx++) { if (!Evaluate(ArgValues[Idx], Info, Args[Idx])) { // If we're checking for a potential constant expression, evaluate all @@ -5838,13 +5834,17 @@ static bool EvaluateArgs(ArrayRef Args, ArgVector &ArgValues, /// Evaluate a function call. static bool HandleFunctionCall(SourceLocation CallLoc, const FunctionDecl *Callee, const LValue *This, - ArrayRef Args, APValue *ArgValues, - const Stmt *Body, EvalInfo &Info, - APValue &Result, const LValue *ResultSlot) { + ArrayRef Args, const Stmt *Body, + EvalInfo &Info, APValue &Result, + const LValue *ResultSlot) { + ArgVector ArgValues(Args.size()); + if (!EvaluateArgs(Args, ArgValues, Info, Callee)) + return false; + if (!Info.CheckCallLimit(CallLoc)) return false; - CallStackFrame Frame(Info, CallLoc, Callee, This, ArgValues); + CallStackFrame Frame(Info, CallLoc, Callee, This, ArgValues.data()); // For a trivial copy or move assignment, perform an APValue copy. This is // essential for unions, where the operations performed by the assignment @@ -7293,8 +7293,6 @@ class ExprEvaluatorBase auto Args = llvm::makeArrayRef(E->getArgs(), E->getNumArgs()); bool HasQualifier = false; - ArgVector ArgValues; - // Extract function decl and 'this' pointer from the callee. if (CalleeType->isSpecificBuiltinType(BuiltinType::BoundMember)) { const CXXMethodDecl *Member = nullptr; @@ -7343,22 +7341,6 @@ class ExprEvaluatorBase return Error(E); } - // For an (overloaded) assignment expression, evaluate the RHS before the - // LHS. - auto *OCE = dyn_cast(E); - if (OCE && OCE->isAssignmentOp()) { - assert(Args.size() == 2 && "wrong number of arguments in assignment"); - if (isa(FD)) { - // Args[0] is the object argument. - if (!EvaluateArgs({Args[1]}, ArgValues, Info, FD)) - return false; - } else { - if (!EvaluateArgs({Args[1], Args[0]}, ArgValues, Info, FD)) - return false; - std::swap(ArgValues[0], ArgValues[1]); - } - } - // Overloaded operator calls to member functions are represented as normal // calls with '*this' as the first argument. const CXXMethodDecl *MD = dyn_cast(FD); @@ -7421,11 +7403,6 @@ class ExprEvaluatorBase } else return Error(E); - // Evaluate the arguments now if we've not already done so. - if (ArgValues.empty() && !Args.empty() && - !EvaluateArgs(Args, ArgValues, Info, FD)) - return false; - SmallVector CovariantAdjustmentPath; if (This) { auto *NamedMember = dyn_cast(FD); @@ -7447,7 +7424,6 @@ class ExprEvaluatorBase // Destructor calls are different enough that they have their own codepath. if (auto *DD = dyn_cast(FD)) { assert(This && "no 'this' pointer for destructor call"); - assert(ArgValues.empty() && "unexpected destructor arguments"); return HandleDestruction(Info, E, *This, Info.Ctx.getRecordType(DD->getParent())); } @@ -7456,8 +7432,8 @@ class ExprEvaluatorBase Stmt *Body = FD->getBody(Definition); if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition, Body) || - !HandleFunctionCall(E->getExprLoc(), Definition, This, Args, - ArgValues.data(), Body, Info, Result, ResultSlot)) + !HandleFunctionCall(E->getExprLoc(), Definition, This, Args, Body, Info, + Result, ResultSlot)) return false; if (!CovariantAdjustmentPath.empty() && @@ -8095,20 +8071,17 @@ bool LValueExprEvaluator::VisitArraySubscriptExpr(const ArraySubscriptExpr *E) { if (E->getBase()->getType()->isVectorType()) return Error(E); - APSInt Index; bool Success = true; - - // C++17's rules require us to evaluate the LHS first, regardless of which - // side is the base. - for (const Expr *SubExpr : {E->getLHS(), E->getRHS()}) { - if (SubExpr == E->getBase() ? !evaluatePointer(SubExpr, Result) - : !EvaluateInteger(SubExpr, Index, Info)) { - if (!Info.noteFailure()) - return false; - Success = false; - } + if (!evaluatePointer(E->getBase(), Result)) { + if (!Info.noteFailure()) + return false; + Success = false; } + APSInt Index; + if (!EvaluateInteger(E->getIdx(), Index, Info)) + return false; + return Success && HandleLValueArrayAdjustment(Info, E, Result, E->getType(), Index); } @@ -8152,10 +8125,7 @@ bool LValueExprEvaluator::VisitCompoundAssignOperator( if (!Info.getLangOpts().CPlusPlus14 && !Info.keepEvaluatingAfterFailure()) return Error(CAO); - // C++17 onwards require that we evaluate the RHS first. APValue RHS; - if (!Evaluate(RHS, this->Info, CAO->getRHS())) - return false; // The overall lvalue result is the result of evaluating the LHS. if (!this->Visit(CAO->getLHS())) { @@ -8164,6 +8134,9 @@ bool LValueExprEvaluator::VisitCompoundAssignOperator( return false; } + if (!Evaluate(RHS, this->Info, CAO->getRHS())) + return false; + return handleCompoundAssignment( this->Info, CAO, Result, CAO->getLHS()->getType(), CAO->getComputationLHSType(), @@ -8174,10 +8147,7 @@ bool LValueExprEvaluator::VisitBinAssign(const BinaryOperator *E) { if (!Info.getLangOpts().CPlusPlus14 && !Info.keepEvaluatingAfterFailure()) return Error(E); - // C++17 onwards require that we evaluate the RHS first. APValue NewVal; - if (!Evaluate(NewVal, this->Info, E->getRHS())) - return false; if (!this->Visit(E->getLHS())) { if (Info.noteFailure()) @@ -8185,6 +8155,9 @@ bool LValueExprEvaluator::VisitBinAssign(const BinaryOperator *E) { return false; } + if (!Evaluate(NewVal, this->Info, E->getRHS())) + return false; + if (Info.getLangOpts().CPlusPlus20 && !HandleUnionActiveMemberChange(Info, E->getLHS(), Result)) return false; @@ -15297,8 +15270,7 @@ bool Expr::isPotentialConstantExpr(const FunctionDecl *FD, } else { SourceLocation Loc = FD->getLocation(); HandleFunctionCall(Loc, FD, (MD && MD->isInstance()) ? &This : nullptr, - Args, /*ArgValues*/ nullptr, FD->getBody(), Info, - Scratch, nullptr); + Args, FD->getBody(), Info, Scratch, nullptr); } return Diags.empty(); @@ -15320,8 +15292,13 @@ bool Expr::isPotentialConstantExprUnevaluated(Expr *E, Info.CheckingPotentialConstantExpression = true; // Fabricate a call stack frame to give the arguments a plausible cover story. - CallStackFrame Frame(Info, SourceLocation(), FD, /*This*/ nullptr, - /*ArgValues*/ nullptr); + ArrayRef Args; + ArgVector ArgValues(0); + bool Success = EvaluateArgs(Args, ArgValues, Info, FD); + (void)Success; + assert(Success && + "Failed to set up arguments for potential constant evaluation"); + CallStackFrame Frame(Info, SourceLocation(), FD, nullptr, ArgValues.data()); APValue ResultScratch; Evaluate(ResultScratch, Info, E); diff --git a/clang/test/SemaCXX/constant-expression-cxx1z.cpp b/clang/test/SemaCXX/constant-expression-cxx1z.cpp index 7770e92c63310..2b366adf2e914 100644 --- a/clang/test/SemaCXX/constant-expression-cxx1z.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx1z.cpp @@ -59,112 +59,3 @@ void test() { else if constexpr (v) {} } } - -// Check that assignment operators evaluate their operands right-to-left. -namespace EvalOrder { - template struct lvalue { - T t; - constexpr T &get() { return t; } - }; - - struct UserDefined { - int n = 0; - constexpr UserDefined &operator=(const UserDefined&) { return *this; } - constexpr UserDefined &operator+=(const UserDefined&) { return *this; } - constexpr void operator<<(const UserDefined&) const {} - constexpr void operator>>(const UserDefined&) const {} - constexpr void operator+(const UserDefined&) const {} - constexpr void operator[](int) const {} - }; - constexpr UserDefined ud; - - struct NonMember {}; - constexpr void operator+=(NonMember, NonMember) {} - constexpr void operator<<(NonMember, NonMember) {} - constexpr void operator>>(NonMember, NonMember) {} - constexpr void operator+(NonMember, NonMember) {} - constexpr NonMember nm; - - constexpr void f(...) {} - - // Helper to ensure that 'a' is evaluated before 'b'. - struct seq_checker { - bool done_a = false; - bool done_b = false; - - template constexpr T &&a(T &&v) { - done_a = true; - return (T &&)v; - } - template constexpr T &&b(T &&v) { - if (!done_a) - throw "wrong"; - done_b = true; - return (T &&)v; - } - - constexpr bool ok() { return done_a && done_b; } - }; - - // SEQ(expr), where part of the expression is tagged A(...) and part is - // tagged B(...), checks that A is evaluated before B. - #define A sc.a - #define B sc.b - #define SEQ(...) static_assert([](seq_checker sc) { void(__VA_ARGS__); return sc.ok(); }({})) - - // Longstanding sequencing rules. - SEQ((A(1), B(2))); - SEQ((A(true) ? B(2) : throw "huh?")); - SEQ((A(false) ? throw "huh?" : B(2))); - SEQ(A(true) && B(true)); - SEQ(A(false) || B(true)); - - // From P0145R3: - - // Rules 1 and 2 have no effect ('b' is not an expression). - - // Rule 3: a->*b - SEQ(A(ud).*B(&UserDefined::n)); - SEQ(A(&ud)->*B(&UserDefined::n)); - - // Rule 4: a(b1, b2, b3) - SEQ(A(f)(B(1), B(2), B(3))); - - // Rule 5: b = a, b @= a - SEQ(B(lvalue().get()) = A(0)); - SEQ(B(lvalue().get()) = A(ud)); - SEQ(B(lvalue().get()) += A(0)); - SEQ(B(lvalue().get()) += A(ud)); - SEQ(B(lvalue().get()) += A(nm)); - - // Rule 6: a[b] - constexpr int arr[3] = {}; - SEQ(A(arr)[B(0)]); - SEQ(A(+arr)[B(0)]); - SEQ(A(0)[B(arr)]); - SEQ(A(0)[B(+arr)]); - SEQ(A(ud)[B(0)]); - - // Rule 7: a << b - SEQ(A(1) << B(2)); - SEQ(A(ud) << B(ud)); - SEQ(A(nm) << B(nm)); - - // Rule 8: a >> b - SEQ(A(1) >> B(2)); - SEQ(A(ud) >> B(ud)); - SEQ(A(nm) >> B(nm)); - - // No particular order of evaluation is specified in other cases, but we in - // practice evaluate left-to-right. - // FIXME: Technically we're expected to check for undefined behavior due to - // unsequenced read and modification and treat it as non-constant due to UB. - SEQ(A(1) + B(2)); - SEQ(A(ud) + B(ud)); - SEQ(A(nm) + B(nm)); - SEQ(f(A(1), B(2))); - - #undef SEQ - #undef A - #undef B -} diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index 9c39e396edd48..3c546eb409dee 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -807,7 +807,6 @@

C++17 implementation status

operator&&, operator||, and operator, functions using expression syntax are no longer guaranteed to be destroyed in reverse construction order in that ABI. -This is not fully supported during constant expression evaluation until Clang 12.

(10): Despite being the resolution to a Defect Report, this feature is disabled by default in all language versions, and can be enabled From b3876ef49093b17555ae058eb87bb9c70a525c49 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Tue, 6 Oct 2020 16:02:17 +0200 Subject: [PATCH 142/321] Silence -Wunused-variable in NDEBUG mode --- llvm/lib/CodeGen/StackMaps.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp index bdcadab0df08e..a5bad76ca202c 100644 --- a/llvm/lib/CodeGen/StackMaps.cpp +++ b/llvm/lib/CodeGen/StackMaps.cpp @@ -409,6 +409,7 @@ void StackMaps::parseStatepointOpers(const MachineInstr &MI, SmallVector, 8> GCPairs; unsigned NumGCPairs = SO.getGCPointerMap(GCPairs); + (void)NumGCPairs; LLVM_DEBUG(dbgs() << "NumGCPairs = " << NumGCPairs << "\n"); auto MOB = MI.operands_begin(); From 66e4f07198761bbb4dcd55235024c1081ed15c75 Mon Sep 17 00:00:00 2001 From: Shivanshu Goyal Date: Tue, 6 Oct 2020 16:12:48 +0200 Subject: [PATCH 143/321] Add ability to turn off -fpch-instantiate-templates in clang-cl A lot of our code building with clang-cl.exe using Clang 11 was failing with the following 2 type of errors: 1. explicit specialization of 'foo' after instantiation 2. no matching function for call to 'bar' Note that we also use -fdelayed-template-parsing in our builds. I tried pretty hard to get a small repro for these failures, but couldn't. So there is some subtle edge case in the -fpch-instantiate-templates feature introduced by this change: https://reviews.llvm.org/D69585 When I tried turning this off using -fno-pch-instantiate-templates, builds would silently fail with the same error without any indication that -fno-pch-instantiate-templates was being ignored by the compiler. Then I realized this "no" option wasn't actually working when I ran Clang under a debugger. Differential revision: https://reviews.llvm.org/D88680 --- clang/include/clang/Driver/Options.td | 4 ++-- clang/lib/Driver/ToolChains/Clang.cpp | 6 +++++- clang/test/Driver/pch-instantiate-templates.c | 13 +++++++++++++ 3 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 clang/test/Driver/pch-instantiate-templates.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e65a68c0deaae..87e7db27a8276 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1479,11 +1479,11 @@ def fno_pch_validate_input_files_content: Group, Flags<[DriverOption]>; def fpch_instantiate_templates: Flag <["-"], "fpch-instantiate-templates">, - Group, Flags<[CC1Option]>, + Group, Flags<[CC1Option, CoreOption]>, HelpText<"Instantiate templates already while building a PCH">; def fno_pch_instantiate_templates: Flag <["-"], "fno-pch-instantiate-templates">, - Group, Flags<[CC1Option]>; + Group, Flags<[CC1Option, CoreOption]>; defm pch_codegen: OptInFFlag<"pch-codegen", "Generate ", "Do not generate ", "code for uses of this PCH that assumes an explicit object file will be built for the PCH">; defm pch_debuginfo: OptInFFlag<"pch-debuginfo", "Generate ", "Do not generate ", diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index f6eeb53964a7d..630b39d1e769e 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1212,7 +1212,11 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, if (YcArg && JA.getKind() >= Action::PrecompileJobClass && JA.getKind() <= Action::AssembleJobClass) { CmdArgs.push_back(Args.MakeArgString("-building-pch-with-obj")); - CmdArgs.push_back(Args.MakeArgString("-fpch-instantiate-templates")); + // -fpch-instantiate-templates is the default when creating + // precomp using /Yc + if (Args.hasFlag(options::OPT_fpch_instantiate_templates, + options::OPT_fno_pch_instantiate_templates, true)) + CmdArgs.push_back(Args.MakeArgString("-fpch-instantiate-templates")); } if (YcArg || YuArg) { StringRef ThroughHeader = YcArg ? YcArg->getValue() : YuArg->getValue(); diff --git a/clang/test/Driver/pch-instantiate-templates.c b/clang/test/Driver/pch-instantiate-templates.c new file mode 100644 index 0000000000000..b0f7f34739938 --- /dev/null +++ b/clang/test/Driver/pch-instantiate-templates.c @@ -0,0 +1,13 @@ +// CL driver test cases +// RUN: %clang_cl -### /Yc /Fpfoo.pch /Fofoo.obj -- %s 2>&1 | FileCheck --check-prefix=CLANG_CL_YC %s +// RUN: %clang_cl -### /Yc /Fpfoo.pch /Fofoo.obj -fno-pch-instantiate-templates -- %s 2>&1 | FileCheck --check-prefix=CLANG_CL_YC_DISABLE %s + +// CLANG_CL_YC: "-fpch-instantiate-templates" +// CLANG_CL_YC_DISABLE-NOT: "-fpch-instantiate-templates" + +// GCC driver test cases +// RUN: %clang -### -x c-header %s -o %t/foo.pch 2>&1 | FileCheck -check-prefix=GCC_DEFAULT %s +// RUN: %clang -### -x c-header %s -o %t/foo.pch -fpch-instantiate-templates 2>&1 | FileCheck -check-prefix=GCC_DEFAULT_ENABLE %s + +// GCC_DEFAULT-NOT: "-fpch-instantiate-templates" +// GCC_DEFAULT_ENABLE: "-fpch-instantiate-templates" From 957094e31b058f1b0a4bd3c76912f7d8b5b294b7 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 7 Sep 2020 17:55:05 -0500 Subject: [PATCH 144/321] [Attributor][NFC] Ignore benign uses in AAMemoryBehaviorFloating In AAMemoryBehaviorFloating we used to track benign uses in a SetVector. With this change we look through benign uses eagerly to reduce the number of elements (=Uses) we look at during an update. The test does actually not fail prior to this commit but I already wrote it so I kept it. --- .../Transforms/IPO/AttributorAttributes.cpp | 41 +++++++++++++---- llvm/test/Transforms/Attributor/readattrs.ll | 46 +++++++++++++++++++ 2 files changed, 78 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 11b91ddd1a919..a3d3381712a8f 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -5861,9 +5861,7 @@ struct AAMemoryBehaviorFloating : AAMemoryBehaviorImpl { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { AAMemoryBehaviorImpl::initialize(A); - // Initialize the use vector with all direct uses of the associated value. - for (const Use &U : getAssociatedValue().uses()) - Uses.insert(&U); + addUsesOf(A, getAssociatedValue()); } /// See AbstractAttribute::updateImpl(...). @@ -5889,8 +5887,14 @@ struct AAMemoryBehaviorFloating : AAMemoryBehaviorImpl { void analyzeUseIn(Attributor &A, const Use *U, const Instruction *UserI); protected: + /// Add the uses of \p V to the `Uses` set we look at during the update step. + void addUsesOf(Attributor &A, const Value &V); + /// Container for (transitive) uses of the associated argument. - SetVector Uses; + SmallVector Uses; + + /// Set to remember the uses we already traversed. + SmallPtrSet Visited; }; /// Memory behavior attribute for function argument. @@ -5915,9 +5919,7 @@ struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating { if (!Arg || !A.isFunctionIPOAmendable(*(Arg->getParent()))) { indicatePessimisticFixpoint(); } else { - // Initialize the use vector with all direct uses of the associated value. - for (const Use &U : Arg->uses()) - Uses.insert(&U); + addUsesOf(A, *Arg); } } @@ -6169,8 +6171,7 @@ ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) { // Check if the users of UserI should also be visited. if (followUsersOfUseIn(A, U, UserI)) - for (const Use &UserIUse : UserI->uses()) - Uses.insert(&UserIUse); + addUsesOf(A, *UserI); // If UserI might touch memory we analyze the use in detail. if (UserI->mayReadOrWriteMemory()) @@ -6181,6 +6182,28 @@ ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) { : ChangeStatus::UNCHANGED; } +void AAMemoryBehaviorFloating::addUsesOf(Attributor &A, const Value &V) { + SmallVector WL; + for (const Use &U : V.uses()) + WL.push_back(&U); + + while (!WL.empty()) { + const Use *U = WL.pop_back_val(); + if (!Visited.insert(U).second) + continue; + + const Instruction *UserI = cast(U->getUser()); + if (UserI->mayReadOrWriteMemory()) { + Uses.push_back(U); + continue; + } + if (!followUsersOfUseIn(A, U, UserI)) + continue; + for (const Use &UU : UserI->uses()) + WL.push_back(&UU); + } +} + bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U, const Instruction *UserI) { // The loaded value is unrelated to the pointer argument, no need to diff --git a/llvm/test/Transforms/Attributor/readattrs.ll b/llvm/test/Transforms/Attributor/readattrs.ll index 2e87ae196183d..4dca552acbcdb 100644 --- a/llvm/test/Transforms/Attributor/readattrs.ll +++ b/llvm/test/Transforms/Attributor/readattrs.ll @@ -403,3 +403,49 @@ define void @ptr_uses(i8* %ptr) { call void @val_use(i8 %call_val) ret void } + +define void @ptr_use_chain(i8* %ptr) { +; CHECK-LABEL: define {{[^@]+}}@ptr_use_chain +; CHECK-SAME: (i8* [[PTR:%.*]]) +; CHECK-NEXT: [[BC0:%.*]] = bitcast i8* [[PTR]] to i32* +; CHECK-NEXT: [[BC1:%.*]] = bitcast i32* [[BC0]] to i8* +; CHECK-NEXT: [[BC2:%.*]] = bitcast i8* [[BC1]] to i32* +; CHECK-NEXT: [[BC3:%.*]] = bitcast i32* [[BC2]] to i8* +; CHECK-NEXT: [[BC4:%.*]] = bitcast i8* [[BC3]] to i32* +; CHECK-NEXT: [[BC5:%.*]] = bitcast i32* [[BC4]] to i8* +; CHECK-NEXT: [[BC6:%.*]] = bitcast i8* [[BC5]] to i32* +; CHECK-NEXT: [[BC7:%.*]] = bitcast i32* [[BC6]] to i8* +; CHECK-NEXT: [[BC8:%.*]] = bitcast i8* [[BC7]] to i32* +; CHECK-NEXT: [[BC9:%.*]] = bitcast i32* [[BC8]] to i8* +; CHECK-NEXT: [[ABC2:%.*]] = bitcast i8* [[BC9]] to i32* +; CHECK-NEXT: [[ABC3:%.*]] = bitcast i32* [[ABC2]] to i8* +; CHECK-NEXT: [[ABC4:%.*]] = bitcast i8* [[ABC3]] to i32* +; CHECK-NEXT: [[ABC5:%.*]] = bitcast i32* [[ABC4]] to i8* +; CHECK-NEXT: [[ABC6:%.*]] = bitcast i8* [[ABC5]] to i32* +; CHECK-NEXT: [[ABC7:%.*]] = bitcast i32* [[ABC6]] to i8* +; CHECK-NEXT: [[ABC8:%.*]] = bitcast i8* [[ABC7]] to i32* +; CHECK-NEXT: [[ABC9:%.*]] = bitcast i32* [[ABC8]] to i8* +; CHECK-NEXT: call void @escape_i8(i8* [[ABC9]]) +; CHECK-NEXT: ret void +; + %bc0 = bitcast i8* %ptr to i32* + %bc1 = bitcast i32* %bc0 to i8* + %bc2 = bitcast i8* %bc1 to i32* + %bc3 = bitcast i32* %bc2 to i8* + %bc4 = bitcast i8* %bc3 to i32* + %bc5 = bitcast i32* %bc4 to i8* + %bc6 = bitcast i8* %bc5 to i32* + %bc7 = bitcast i32* %bc6 to i8* + %bc8 = bitcast i8* %bc7 to i32* + %bc9 = bitcast i32* %bc8 to i8* + %abc2 = bitcast i8* %bc9 to i32* + %abc3 = bitcast i32* %abc2 to i8* + %abc4 = bitcast i8* %abc3 to i32* + %abc5 = bitcast i32* %abc4 to i8* + %abc6 = bitcast i8* %abc5 to i32* + %abc7 = bitcast i32* %abc6 to i8* + %abc8 = bitcast i8* %abc7 to i32* + %abc9 = bitcast i32* %abc8 to i8* + call void @escape_i8(i8* %abc9) + ret void +} From 04f6951397cfbb892b99027bd3c0e4e0382f5458 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 8 Sep 2020 20:18:54 -0500 Subject: [PATCH 145/321] [Attributor][FIX] Dead return values are not `noundef` When we assume a return value is dead we might still visit return instructions via `Attributor::checkForAllReturnedValuesAndReturnInsts(..)`. When we do so the "returned value" is potentially simplified to `undef` as it is the assumed "returned value". This is a problem if there was a preexisting `noundef` attribute that will only be removed as we manifest the `undef` return value. We should not use this combination to derive `unreachable` though. Two test cases fixed. --- .../Transforms/IPO/AttributorAttributes.cpp | 22 +++++++++++-------- .../2008-02-01-ReturnAttrs.ll | 4 ++-- .../Attributor/ArgumentPromotion/byval.ll | 14 ++++++------ .../Attributor/ArgumentPromotion/inalloca.ll | 4 ++-- .../live_called_from_dead.ll | 2 +- .../live_called_from_dead_2.ll | 2 +- llvm/test/Transforms/Attributor/align.ll | 4 ++-- llvm/test/Transforms/Attributor/depgraph.ll | 4 ++-- llvm/test/Transforms/Attributor/noalias.ll | 4 ++-- llvm/test/Transforms/Attributor/readattrs.ll | 2 +- 10 files changed, 33 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index a3d3381712a8f..6e31625f9daae 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -2060,6 +2060,10 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { // Check if a return instruction always cause UB or not // Note: It is guaranteed that the returned position of the anchor // scope has noundef attribute when this is called. + // We also ensure the return position is not "assumed dead" + // because the returned value was then potentially simplified to + // `undef` in AAReturnedValues without removing the `noundef` + // attribute yet. // When the returned position has noundef attriubte, UB occur in the // following cases. @@ -2067,9 +2071,6 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { // (2) The value is known to be a null pointer and the returned // position has nonnull attribute (because the returned value is // poison). - // Note: This callback is not called for a dead returned value because - // such values are ignored in - // checkForAllReturnedValuesAndReturnedInsts. bool FoundUB = false; if (isa(V)) { FoundUB = true; @@ -2101,12 +2102,15 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { // If the returned position of the anchor scope has noundef attriubte, check // all returned instructions. if (!getAnchorScope()->getReturnType()->isVoidTy()) { - auto &RetPosNoUndefAA = - A.getAAFor(*this, IRPosition::returned(*getAnchorScope()), - /* TrackDependence */ false); - if (RetPosNoUndefAA.isKnownNoUndef()) - A.checkForAllReturnedValuesAndReturnInsts(InspectReturnInstForUB, - *this); + const IRPosition &ReturnIRP = IRPosition::returned(*getAnchorScope()); + if (!A.isAssumedDead(ReturnIRP, this, nullptr)) { + auto &RetPosNoUndefAA = + A.getAAFor(*this, ReturnIRP, + /* TrackDependence */ false); + if (RetPosNoUndefAA.isKnownNoUndef()) + A.checkForAllReturnedValuesAndReturnInsts(InspectReturnInstForUB, + *this); + } } if (NoUBPrevSize != AssumedNoUBInsts.size() || diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll index eed3e40d9ade8..8603c2c769fc4 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll index 0f0fce92e3e35..8d6603dffaa33 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll @@ -152,15 +152,15 @@ define i32 @main() nounwind { ; IS__TUNIT_NPM-NEXT: store i32 1, i32* [[TMP1]], align 8 ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__TUNIT_NPM-NEXT: store i64 2, i64* [[TMP4]], align 4 -; IS__TUNIT_NPM-NEXT: [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32* -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST1]], align 8 -; IS__TUNIT_NPM-NEXT: [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_12]], align 8 -; IS__TUNIT_NPM-NEXT: [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]]) [[ATTR0]] ; IS__TUNIT_NPM-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* -; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[S_CAST]], align 32 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 8 ; IS__TUNIT_NPM-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i64, i64* [[S_0_1]], align 32 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 8 +; IS__TUNIT_NPM-NEXT: [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]]) [[ATTR0]] +; IS__TUNIT_NPM-NEXT: [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32* +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[S_CAST1]], align 32 +; IS__TUNIT_NPM-NEXT: [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i64, i64* [[S_0_12]], align 32 ; IS__TUNIT_NPM-NEXT: [[C1:%.*]] = call i32 @g(i32 [[TMP2]], i64 [[TMP3]]) [[ATTR0]] ; IS__TUNIT_NPM-NEXT: [[A:%.*]] = add i32 [[C0]], [[C1]] ; IS__TUNIT_NPM-NEXT: ret i32 [[A]] diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll index f380e34409619..a071b5294991e 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll index 8c7a16abd001e..8426620397a56 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll @@ -56,7 +56,7 @@ define internal i32 @caller(i32* %B) { ; IS__CGSCC_NPM-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__CGSCC_NPM-NEXT: store i32 1, i32* [[A]], align 4 ; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]]) [[ATTR2:#.*]] -; IS__CGSCC_NPM-NEXT: unreachable +; IS__CGSCC_NPM-NEXT: ret i32 undef ; %A = alloca i32 store i32 1, i32* %A diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll index 4c0ab821eb2df..113300e98684a 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll @@ -64,7 +64,7 @@ define internal i32 @caller(i32* %B) { ; IS__CGSCC_NPM-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__CGSCC_NPM-NEXT: store i32 1, i32* [[A]], align 4 ; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) [[ATTR2:#.*]] -; IS__CGSCC_NPM-NEXT: unreachable +; IS__CGSCC_NPM-NEXT: ret i32 undef ; %A = alloca i32 store i32 1, i32* %A diff --git a/llvm/test/Transforms/Attributor/align.ll b/llvm/test/Transforms/Attributor/align.ll index 9c2c04e157ba8..e859194776955 100644 --- a/llvm/test/Transforms/Attributor/align.ll +++ b/llvm/test/Transforms/Attributor/align.ll @@ -176,7 +176,7 @@ define internal i8* @f1(i8* readnone %0) local_unnamed_addr #0 { ; IS__CGSCC_NPM: 1: ; IS__CGSCC_NPM-NEXT: unreachable ; IS__CGSCC_NPM: 2: -; IS__CGSCC_NPM-NEXT: unreachable +; IS__CGSCC_NPM-NEXT: ret i8* undef ; %2 = icmp eq i8* %0, null br i1 %2, label %3, label %5 @@ -261,7 +261,7 @@ define internal i8* @f1b(i8* readnone %0) local_unnamed_addr #0 { ; IS__CGSCC_NPM: 1: ; IS__CGSCC_NPM-NEXT: unreachable ; IS__CGSCC_NPM: 2: -; IS__CGSCC_NPM-NEXT: unreachable +; IS__CGSCC_NPM-NEXT: ret i8* undef ; %2 = icmp eq i8* %0, null br i1 %2, label %3, label %5 diff --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll index d7dc9d42f49b2..c0de61597d6b8 100644 --- a/llvm/test/Transforms/Attributor/depgraph.ll +++ b/llvm/test/Transforms/Attributor/depgraph.ll @@ -61,6 +61,8 @@ define i32* @checkAndAdvance(i32* align 16 %0) { ; GRAPH-EMPTY: ; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state simplified ; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state assumed-live +; GRAPH-EMPTY: ; GRAPH-NEXT: [AANoUndef] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state may-undef-or-poison ; GRAPH-EMPTY: ; GRAPH-NEXT: [AAReturnedValues] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state returns(#3)[#UC: 1] @@ -126,8 +128,6 @@ define i32* @checkAndAdvance(i32* align 16 %0) { ; GRAPH-EMPTY: ; GRAPH-NEXT: [AAHeapToStack] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state [H2S] Mallocs: 0 ; GRAPH-EMPTY: -; GRAPH-NEXT: [AAIsDead] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state assumed-live -; GRAPH-EMPTY: ; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state simplified ; GRAPH-EMPTY: ; GRAPH-NEXT: [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state align<0-16> diff --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll index 18bb8e9719d52..f60346dd71661 100644 --- a/llvm/test/Transforms/Attributor/noalias.ll +++ b/llvm/test/Transforms/Attributor/noalias.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; TODO: The old pass manager cgscc run is disabled as it causes a crash on windows which is under investigation: http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/25479/steps/test-check-all/logs/FAIL%3A%20LLVM%3A%3Anoalias.ll ; opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/readattrs.ll b/llvm/test/Transforms/Attributor/readattrs.ll index 4dca552acbcdb..17c819b4df181 100644 --- a/llvm/test/Transforms/Attributor/readattrs.ll +++ b/llvm/test/Transforms/Attributor/readattrs.ll @@ -406,7 +406,7 @@ define void @ptr_uses(i8* %ptr) { define void @ptr_use_chain(i8* %ptr) { ; CHECK-LABEL: define {{[^@]+}}@ptr_use_chain -; CHECK-SAME: (i8* [[PTR:%.*]]) +; CHECK-SAME: (i8* [[PTR:%.*]]) { ; CHECK-NEXT: [[BC0:%.*]] = bitcast i8* [[PTR]] to i32* ; CHECK-NEXT: [[BC1:%.*]] = bitcast i32* [[BC0]] to i8* ; CHECK-NEXT: [[BC2:%.*]] = bitcast i8* [[BC1]] to i32* From 4a7a988442dfa40309b34746218c07ebf758b378 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 9 Sep 2020 16:08:20 -0500 Subject: [PATCH 146/321] [Attributor][FIX] Move assertion to make it not trivially fail The idea of this assertion was to check the simplified value before we assign it, not after, which caused this to trivially fail all the time. --- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 4 ++-- llvm/test/Transforms/Attributor/value-simplify.ll | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 6e31625f9daae..3f85d968f8e3b 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -4774,10 +4774,10 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { if (Op0IsNull && Op1IsNull) { Value *NewVal = ConstantInt::get( Type::getInt1Ty(Ctx), ICmp->getPredicate() == CmpInst::ICMP_EQ); - SimplifiedAssociatedValue = NewVal; - indicateOptimisticFixpoint(); assert(!SimplifiedAssociatedValue.hasValue() && "Did not expect non-fixed value for constant comparison"); + SimplifiedAssociatedValue = NewVal; + indicateOptimisticFixpoint(); Changed = ChangeStatus::CHANGED; return true; } diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll index 174fd799a6e0e..8fc34cd08ed6e 100644 --- a/llvm/test/Transforms/Attributor/value-simplify.ll +++ b/llvm/test/Transforms/Attributor/value-simplify.ll @@ -788,3 +788,18 @@ define internal i8 @callee(i8 %a) { ret i8 %c } + +define i1 @icmp() { +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@icmp +; IS__TUNIT____-SAME: () [[ATTR1]] { +; IS__TUNIT____-NEXT: ret i1 true +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@icmp +; IS__CGSCC____-SAME: () [[ATTR1]] { +; IS__CGSCC____-NEXT: ret i1 true +; + %c = icmp eq i8* null, null + ret i1 %c +} From 61d4b342d15a82d5e841d9e72a52d77eaa5b5097 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 22 Sep 2020 15:18:55 -0700 Subject: [PATCH 147/321] [test][NewPM] Make dead-uses.ll work under NPM This one is weird... globals-aa needs to be already computed at licm, or else a function pass can't run a module analysis and won't have access to globals-aa. But the globals-aa result is impacted by instcombine in a way that affects what the test is expecting. If globals-aa is computed before instcombine, it is cached and globals-aa used in licm won't contain the necessary info provided by instcombine. Another catch is that if we don't invalidate AAManager, it will use the cached AAManager that instcombine requested, which may not contain globals-aa. So we have to invalidate so that licm can recompute an AAManager with the globals-aa created by the require. This is essentially the problem described in https://reviews.llvm.org/D84259. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D88118 --- llvm/test/Analysis/GlobalsModRef/dead-uses.ll | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/test/Analysis/GlobalsModRef/dead-uses.ll b/llvm/test/Analysis/GlobalsModRef/dead-uses.ll index a96655d48bfd0..3657e880007ca 100644 --- a/llvm/test/Analysis/GlobalsModRef/dead-uses.ll +++ b/llvm/test/Analysis/GlobalsModRef/dead-uses.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -instcombine -globals-aa -licm -S | FileCheck %s +; RUN: opt < %s -instcombine -globals-aa -licm -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa,globals-aa -passes='function(instcombine),require,function(invalidate,loop(licm))' -S | FileCheck %s ; Make sure -globals-aa ignores dead uses of globals. From 8df17b4dc12292cd7adc9c46a5bd75b8db26d49e Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 23 Sep 2020 15:11:40 -0700 Subject: [PATCH 148/321] [test][InstCombine][NewPM] Fix InstCombine tests under NPM Some of these depended on analyses being present that aren't provided automatically in NPM. early_dce_clobbers_callgraph.ll was previously inlining a noinline function? cast-call-combine.ll relied on the legacy always-inline pass being a CGSCC pass and getting rerun. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D88187 --- llvm/test/Transforms/InstCombine/cast-call-combine.ll | 2 +- .../Transforms/InstCombine/early_dce_clobbers_callgraph.ll | 6 ++---- llvm/test/Transforms/InstCombine/fputs-opt-size.ll | 3 ++- .../Transforms/InstCombine/gep-combine-loop-invariant.ll | 3 ++- llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/cast-call-combine.ll b/llvm/test/Transforms/InstCombine/cast-call-combine.ll index be70a8763ea88..bf70811b6c4bb 100644 --- a/llvm/test/Transforms/InstCombine/cast-call-combine.ll +++ b/llvm/test/Transforms/InstCombine/cast-call-combine.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -always-inline -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -always-inline -S | FileCheck %s define internal void @foo(i16*) alwaysinline { ret void diff --git a/llvm/test/Transforms/InstCombine/early_dce_clobbers_callgraph.ll b/llvm/test/Transforms/InstCombine/early_dce_clobbers_callgraph.ll index 743477621fa18..01b72f6ab2018 100644 --- a/llvm/test/Transforms/InstCombine/early_dce_clobbers_callgraph.ll +++ b/llvm/test/Transforms/InstCombine/early_dce_clobbers_callgraph.ll @@ -11,9 +11,7 @@ ; no longer used function 'bar' (due to incorrect reference ; count in the CallGraph). -attributes #0 = { noinline norecurse nounwind readnone } - -define void @foo() #0 { +define void @foo() { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void @@ -23,7 +21,7 @@ entry: ret void } -define internal i32 @bar() #0 { +define internal i32 @bar() { ; CHECK-NOT: bar entry: ret i32 42 diff --git a/llvm/test/Transforms/InstCombine/fputs-opt-size.ll b/llvm/test/Transforms/InstCombine/fputs-opt-size.ll index 54ac96f9f8627..f39d0465c6364 100644 --- a/llvm/test/Transforms/InstCombine/fputs-opt-size.ll +++ b/llvm/test/Transforms/InstCombine/fputs-opt-size.ll @@ -2,7 +2,8 @@ ; because it requires more arguments and thus extra MOVs are required. ; ; RUN: opt < %s -instcombine -S | FileCheck %s -; RUN: opt < %s -instcombine -pgso -S | FileCheck %s -check-prefix=PGSO +; RUN: opt < %s -instcombine -pgso -S -enable-new-pm=0 | FileCheck %s -check-prefix=PGSO +; RUN: opt < %s -passes='require,function(instcombine)' -pgso -S | FileCheck %s -check-prefix=PGSO ; RUN: opt < %s -instcombine -pgso=false -S | FileCheck %s -check-prefix=NPGSO %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] } diff --git a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll index 43887caeecbb9..75888c880b226 100644 --- a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll +++ b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -passes='require,instcombine' -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll b/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll index 4a80a885be399..006936f2c24e4 100644 --- a/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll +++ b/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -disable-output -branch-prob -instcombine -block-freq -verify-dom-info -; RUN: opt < %s -postdomtree -analyze | FileCheck --check-prefixes=CHECK-POSTDOM %s +; RUN: opt < %s -postdomtree -analyze -enable-new-pm=0 | FileCheck --check-prefixes=CHECK-POSTDOM %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck --check-prefixes=CHECK-POSTDOM %s ; Demonstrate that Predicate Canonicalization (InstCombine) does not invalidate PostDomTree From 40251fee00840b98d927c2c138e45e812bc3468b Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 5 Oct 2020 15:17:12 -0700 Subject: [PATCH 149/321] [BPF][NewPM] Make BPFTargetMachine properly adjust NPM optimizer pipeline This involves porting BPFAbstractMemberAccess and BPFPreserveDIType to NPM, then adding them BPFTargetMachine::registerPassBuilderCallbacks (the NPM equivalent of adjustPassManager()). Reviewed By: yonghong-song, asbirlea Differential Revision: https://reviews.llvm.org/D88855 --- llvm/lib/Target/BPF/BPF.h | 19 +++++- .../Target/BPF/BPFAbstractMemberAccess.cpp | 50 +++++++++++----- llvm/lib/Target/BPF/BPFPreserveDIType.cpp | 58 ++++++++++--------- llvm/lib/Target/BPF/BPFTargetMachine.cpp | 19 +++++- llvm/lib/Target/BPF/BPFTargetMachine.h | 2 + llvm/test/CodeGen/BPF/CORE/store-addr.ll | 3 + 6 files changed, 105 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h index 79466fa3671f2..966a3b0cf26e6 100644 --- a/llvm/lib/Target/BPF/BPF.h +++ b/llvm/lib/Target/BPF/BPF.h @@ -10,6 +10,7 @@ #define LLVM_LIB_TARGET_BPF_BPF_H #include "MCTargetDesc/BPFMCTargetDesc.h" +#include "llvm/IR/PassManager.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -28,13 +29,27 @@ FunctionPass *createBPFMIPreEmitCheckingPass(); void initializeBPFCheckAndAdjustIRPass(PassRegistry&); -void initializeBPFAbstractMemberAccessPass(PassRegistry&); +void initializeBPFAbstractMemberAccessLegacyPassPass(PassRegistry &); void initializeBPFPreserveDITypePass(PassRegistry&); void initializeBPFMISimplifyPatchablePass(PassRegistry&); void initializeBPFMIPeepholePass(PassRegistry&); void initializeBPFMIPeepholeTruncElimPass(PassRegistry&); void initializeBPFMIPreEmitPeepholePass(PassRegistry&); void initializeBPFMIPreEmitCheckingPass(PassRegistry&); -} + +class BPFAbstractMemberAccessPass + : public PassInfoMixin { + BPFTargetMachine *TM; + +public: + BPFAbstractMemberAccessPass(BPFTargetMachine *TM) : TM(TM) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +class BPFPreserveDITypePass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // namespace llvm #endif diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp index 848fc36538c88..d04b0644ac497 100644 --- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -83,6 +83,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicsBPF.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -113,18 +114,11 @@ Instruction *BPFCoreSharedInfo::insertPassThrough(Module *M, BasicBlock *BB, using namespace llvm; namespace { - -class BPFAbstractMemberAccess final : public FunctionPass { - bool runOnFunction(Function &F) override; - +class BPFAbstractMemberAccess final { public: - static char ID; - TargetMachine *TM; - // Add optional BPFTargetMachine parameter so that BPF backend can add the phase - // with target machine to find out the endianness. The default constructor (without - // parameters) is used by the pass manager for managing purposes. - BPFAbstractMemberAccess(BPFTargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM) {} + BPFAbstractMemberAccess(BPFTargetMachine *TM) : TM(TM) {} + + bool run(Function &F); struct CallInfo { uint32_t Kind; @@ -143,6 +137,7 @@ class BPFAbstractMemberAccess final : public FunctionPass { BPFPreserveFieldInfoAI = 4, }; + TargetMachine *TM; const DataLayout *DL = nullptr; Module *M = nullptr; @@ -183,17 +178,36 @@ class BPFAbstractMemberAccess final : public FunctionPass { uint64_t getConstant(const Value *IndexValue); bool transformGEPChain(CallInst *Call, CallInfo &CInfo); }; + +class BPFAbstractMemberAccessLegacyPass final : public FunctionPass { + BPFTargetMachine *TM; + + bool runOnFunction(Function &F) override { + return BPFAbstractMemberAccess(TM).run(F); + } + +public: + static char ID; + + // Add optional BPFTargetMachine parameter so that BPF backend can add the + // phase with target machine to find out the endianness. The default + // constructor (without parameters) is used by the pass manager for managing + // purposes. + BPFAbstractMemberAccessLegacyPass(BPFTargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM) {} +}; + } // End anonymous namespace -char BPFAbstractMemberAccess::ID = 0; -INITIALIZE_PASS(BPFAbstractMemberAccess, DEBUG_TYPE, +char BPFAbstractMemberAccessLegacyPass::ID = 0; +INITIALIZE_PASS(BPFAbstractMemberAccessLegacyPass, DEBUG_TYPE, "BPF Abstract Member Access", false, false) FunctionPass *llvm::createBPFAbstractMemberAccess(BPFTargetMachine *TM) { - return new BPFAbstractMemberAccess(TM); + return new BPFAbstractMemberAccessLegacyPass(TM); } -bool BPFAbstractMemberAccess::runOnFunction(Function &F) { +bool BPFAbstractMemberAccess::run(Function &F) { LLVM_DEBUG(dbgs() << "********** Abstract Member Accesses **********\n"); M = F.getParent(); @@ -1096,3 +1110,9 @@ bool BPFAbstractMemberAccess::doTransformation(Function &F) { return removePreserveAccessIndexIntrinsic(F) || Transformed; } + +PreservedAnalyses +BPFAbstractMemberAccessPass::run(Function &F, FunctionAnalysisManager &AM) { + return BPFAbstractMemberAccess(TM).run(F) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp index 282f0fbfafc35..d2e969780786f 100644 --- a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp +++ b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -33,41 +34,15 @@ using namespace llvm; namespace { -class BPFPreserveDIType final : public FunctionPass { - bool runOnFunction(Function &F) override; - -public: - static char ID; - BPFPreserveDIType() : FunctionPass(ID) {} - -private: - Module *M = nullptr; - - bool doTransformation(Function &F); -}; -} // End anonymous namespace - -char BPFPreserveDIType::ID = 0; -INITIALIZE_PASS(BPFPreserveDIType, DEBUG_TYPE, "BPF Preserve Debuginfo Type", - false, false) - -FunctionPass *llvm::createBPFPreserveDIType() { return new BPFPreserveDIType(); } - -bool BPFPreserveDIType::runOnFunction(Function &F) { +static bool BPFPreserveDITypeImpl(Function &F) { LLVM_DEBUG(dbgs() << "********** preserve debuginfo type **********\n"); - M = F.getParent(); - if (!M) - return false; + Module *M = F.getParent(); // Bail out if no debug info. if (M->debug_compile_units().empty()) return false; - return doTransformation(F); -} - -bool BPFPreserveDIType::doTransformation(Function &F) { std::vector PreserveDITypeCalls; for (auto &BB : F) { @@ -135,3 +110,30 @@ bool BPFPreserveDIType::doTransformation(Function &F) { return true; } + +class BPFPreserveDIType final : public FunctionPass { + bool runOnFunction(Function &F) override; + +public: + static char ID; + BPFPreserveDIType() : FunctionPass(ID) {} +}; +} // End anonymous namespace + +char BPFPreserveDIType::ID = 0; +INITIALIZE_PASS(BPFPreserveDIType, DEBUG_TYPE, "BPF Preserve Debuginfo Type", + false, false) + +FunctionPass *llvm::createBPFPreserveDIType() { + return new BPFPreserveDIType(); +} + +bool BPFPreserveDIType::runOnFunction(Function &F) { + return BPFPreserveDITypeImpl(F); +} + +PreservedAnalyses BPFPreserveDITypePass::run(Function &F, + FunctionAnalysisManager &AM) { + return BPFPreserveDITypeImpl(F) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp index 7511dde27b348..e5fc5bac97a8d 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -18,11 +18,14 @@ #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" using namespace llvm; @@ -37,7 +40,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTarget() { RegisterTargetMachine Z(getTheBPFTarget()); PassRegistry &PR = *PassRegistry::getPassRegistry(); - initializeBPFAbstractMemberAccessPass(PR); + initializeBPFAbstractMemberAccessLegacyPassPass(PR); initializeBPFPreserveDITypePass(PR); initializeBPFCheckAndAdjustIRPass(PR); initializeBPFMIPeepholePass(PR); @@ -114,6 +117,20 @@ void BPFTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { }); } +void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, + bool DebugPassManager) { + PB.registerPipelineStartEPCallback([=](ModulePassManager &MPM) { + FunctionPassManager FPM(DebugPassManager); + FPM.addPass(BPFAbstractMemberAccessPass(this)); + FPM.addPass(BPFPreserveDITypePass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + }); + PB.registerPeepholeEPCallback([=](FunctionPassManager &FPM, + PassBuilder::OptimizationLevel Level) { + FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))); + }); +} + void BPFPassConfig::addIRPasses() { addPass(createBPFCheckAndAdjustIR()); TargetPassConfig::addIRPasses(); diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.h b/llvm/lib/Target/BPF/BPFTargetMachine.h index b252d217014e4..5243a15eb7b05 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.h +++ b/llvm/lib/Target/BPF/BPFTargetMachine.h @@ -39,6 +39,8 @@ class BPFTargetMachine : public LLVMTargetMachine { } void adjustPassManager(PassManagerBuilder &) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool DebugPassManager) override; }; } diff --git a/llvm/test/CodeGen/BPF/CORE/store-addr.ll b/llvm/test/CodeGen/BPF/CORE/store-addr.ll index 609460f7076e1..47d8fd2805901 100644 --- a/llvm/test/CodeGen/BPF/CORE/store-addr.ll +++ b/llvm/test/CodeGen/BPF/CORE/store-addr.ll @@ -1,6 +1,9 @@ ; RUN: opt -O2 %s | llvm-dis > %t1 ; RUN: llc -filetype=asm -o - %t1 | FileCheck %s ; RUN: llc -mattr=+alu32 -filetype=asm -o - %t1 | FileCheck %s +; RUN: opt -passes='default' %s | llvm-dis > %t1 +; RUN: llc -filetype=asm -o - %t1 | FileCheck %s +; RUN: llc -mattr=+alu32 -filetype=asm -o - %t1 | FileCheck %s ; Source code: ; struct t { ; int a; From aa2b593f1495a972a4a592952760ec9d5f7c01f1 Mon Sep 17 00:00:00 2001 From: Aaron En Ye Shi Date: Thu, 1 Oct 2020 21:43:52 +0000 Subject: [PATCH 150/321] [HIP] Restructure hip headers to add cmath Separate __clang_hip_math.h header into __clang_hip_cmath.h and __clang_hip_math.h. Improve the math function definition, and add missing definitions or declarations. Add missing overloads. Reviewed By: tra, JonChesterfield Differential Review: https://reviews.llvm.org/D88837 --- clang/lib/Headers/CMakeLists.txt | 1 + clang/lib/Headers/__clang_hip_cmath.h | 521 ++++++++ .../Headers/__clang_hip_libdevice_declares.h | 16 +- clang/lib/Headers/__clang_hip_math.h | 1094 ++++++++--------- .../lib/Headers/__clang_hip_runtime_wrapper.h | 1 + 5 files changed, 1073 insertions(+), 560 deletions(-) create mode 100644 clang/lib/Headers/__clang_hip_cmath.h diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 95047e7069e7b..533ff4506ffef 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -47,6 +47,7 @@ set(files __clang_cuda_math_forward_declares.h __clang_cuda_runtime_wrapper.h __clang_hip_libdevice_declares.h + __clang_hip_cmath.h __clang_hip_math.h __clang_hip_runtime_wrapper.h cetintrin.h diff --git a/clang/lib/Headers/__clang_hip_cmath.h b/clang/lib/Headers/__clang_hip_cmath.h new file mode 100644 index 0000000000000..067c7e6c9d1ba --- /dev/null +++ b/clang/lib/Headers/__clang_hip_cmath.h @@ -0,0 +1,521 @@ +/*===---- __clang_hip_cmath.h - HIP cmath decls -----------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_HIP_CMATH_H__ +#define __CLANG_HIP_CMATH_H__ + +#if !defined(__HIP__) +#error "This file is for HIP and OpenMP AMDGCN device compilation only." +#endif + +#if defined(__cplusplus) +#include +#endif +#include +#include + +#pragma push_macro("__DEVICE__") +#define __DEVICE__ static __device__ inline __attribute__((always_inline)) + +// Start with functions that cannot be defined by DEF macros below. +#if defined(__cplusplus) +__DEVICE__ double abs(double __x) { return ::fabs(__x); } +__DEVICE__ float abs(float __x) { return ::fabsf(__x); } +__DEVICE__ long long abs(long long __n) { return ::llabs(__n); } +__DEVICE__ long abs(long __n) { return ::labs(__n); } +__DEVICE__ float fma(float __x, float __y, float __z) { + return ::fmaf(__x, __y, __z); +} +__DEVICE__ int fpclassify(float __x) { + return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, + FP_ZERO, __x); +} +__DEVICE__ int fpclassify(double __x) { + return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, + FP_ZERO, __x); +} +__DEVICE__ float frexp(float __arg, int *__exp) { + return ::frexpf(__arg, __exp); +} +__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } +__DEVICE__ bool isfinite(double __x) { return ::__finite(__x); } +__DEVICE__ bool isgreater(float __x, float __y) { + return __builtin_isgreater(__x, __y); +} +__DEVICE__ bool isgreater(double __x, double __y) { + return __builtin_isgreater(__x, __y); +} +__DEVICE__ bool isgreaterequal(float __x, float __y) { + return __builtin_isgreaterequal(__x, __y); +} +__DEVICE__ bool isgreaterequal(double __x, double __y) { + return __builtin_isgreaterequal(__x, __y); +} +__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } +__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } +__DEVICE__ bool isless(float __x, float __y) { + return __builtin_isless(__x, __y); +} +__DEVICE__ bool isless(double __x, double __y) { + return __builtin_isless(__x, __y); +} +__DEVICE__ bool islessequal(float __x, float __y) { + return __builtin_islessequal(__x, __y); +} +__DEVICE__ bool islessequal(double __x, double __y) { + return __builtin_islessequal(__x, __y); +} +__DEVICE__ bool islessgreater(float __x, float __y) { + return __builtin_islessgreater(__x, __y); +} +__DEVICE__ bool islessgreater(double __x, double __y) { + return __builtin_islessgreater(__x, __y); +} +__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } +__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } +__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } +__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } +__DEVICE__ bool isunordered(float __x, float __y) { + return __builtin_isunordered(__x, __y); +} +__DEVICE__ bool isunordered(double __x, double __y) { + return __builtin_isunordered(__x, __y); +} +__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } +__DEVICE__ float pow(float __base, int __iexp) { + return ::powif(__base, __iexp); +} +__DEVICE__ double pow(double __base, int __iexp) { + return ::powi(__base, __iexp); +} +__DEVICE__ float remquo(float __x, float __y, int *__quo) { + return ::remquof(__x, __y, __quo); +} +__DEVICE__ float scalbln(float __x, long int __n) { + return ::scalblnf(__x, __n); +} +__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); } +__DEVICE__ bool signbit(double __x) { return ::__signbit(__x); } + +// Notably missing above is nexttoward. We omit it because +// ocml doesn't provide an implementation, and we don't want to be in the +// business of implementing tricky libm functions in this header. + +// Other functions. +__DEVICE__ _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) { + return __ocml_fma_f16(__x, __y, __z); +} +__DEVICE__ _Float16 pow(_Float16 __base, int __iexp) { + return __ocml_pown_f16(__base, __iexp); +} + +// BEGIN DEF_FUN and HIP_OVERLOAD + +// BEGIN DEF_FUN + +#pragma push_macro("__DEF_FUN1") +#pragma push_macro("__DEF_FUN2") +#pragma push_macro("__DEF_FUN2_FI") + +// Define cmath functions with float argument and returns __retty. +#define __DEF_FUN1(__retty, __func) \ + __DEVICE__ \ + __retty __func(float __x) { return __func##f(__x); } + +// Define cmath functions with two float arguments and returns __retty. +#define __DEF_FUN2(__retty, __func) \ + __DEVICE__ \ + __retty __func(float __x, float __y) { return __func##f(__x, __y); } + +// Define cmath functions with a float and an int argument and returns __retty. +#define __DEF_FUN2_FI(__retty, __func) \ + __DEVICE__ \ + __retty __func(float __x, int __y) { return __func##f(__x, __y); } + +__DEF_FUN1(float, acos) +__DEF_FUN1(float, acosh) +__DEF_FUN1(float, asin) +__DEF_FUN1(float, asinh) +__DEF_FUN1(float, atan) +__DEF_FUN2(float, atan2) +__DEF_FUN1(float, atanh) +__DEF_FUN1(float, cbrt) +__DEF_FUN1(float, ceil) +__DEF_FUN2(float, copysign) +__DEF_FUN1(float, cos) +__DEF_FUN1(float, cosh) +__DEF_FUN1(float, erf) +__DEF_FUN1(float, erfc) +__DEF_FUN1(float, exp) +__DEF_FUN1(float, exp2) +__DEF_FUN1(float, expm1) +__DEF_FUN1(float, fabs) +__DEF_FUN2(float, fdim) +__DEF_FUN1(float, floor) +__DEF_FUN2(float, fmax) +__DEF_FUN2(float, fmin) +__DEF_FUN2(float, fmod) +__DEF_FUN2(float, hypot) +__DEF_FUN1(int, ilogb) +__DEF_FUN2_FI(float, ldexp) +__DEF_FUN1(float, lgamma) +__DEF_FUN1(float, log) +__DEF_FUN1(float, log10) +__DEF_FUN1(float, log1p) +__DEF_FUN1(float, log2) +__DEF_FUN1(float, logb) +__DEF_FUN1(long long, llrint) +__DEF_FUN1(long long, llround) +__DEF_FUN1(long, lrint) +__DEF_FUN1(long, lround) +__DEF_FUN1(float, nearbyint) +__DEF_FUN2(float, nextafter) +__DEF_FUN2(float, pow) +__DEF_FUN2(float, remainder) +__DEF_FUN1(float, rint) +__DEF_FUN1(float, round) +__DEF_FUN2_FI(float, scalbn) +__DEF_FUN1(float, sin) +__DEF_FUN1(float, sinh) +__DEF_FUN1(float, sqrt) +__DEF_FUN1(float, tan) +__DEF_FUN1(float, tanh) +__DEF_FUN1(float, tgamma) +__DEF_FUN1(float, trunc) + +#pragma pop_macro("__DEF_FUN1") +#pragma pop_macro("__DEF_FUN2") +#pragma pop_macro("__DEF_FUN2_FI") + +// END DEF_FUN + +// BEGIN HIP_OVERLOAD + +#pragma push_macro("__HIP_OVERLOAD1") +#pragma push_macro("__HIP_OVERLOAD2") + +// __hip_enable_if::type is a type function which returns __T if __B is true. +template struct __hip_enable_if {}; + +template struct __hip_enable_if { typedef __T type; }; + +// __HIP_OVERLOAD1 is used to resolve function calls with integer argument to +// avoid compilation error due to ambibuity. e.g. floor(5) is resolved with +// floor(double). +#define __HIP_OVERLOAD1(__retty, __fn) \ + template \ + __DEVICE__ typename __hip_enable_if::is_integer, \ + __retty>::type \ + __fn(__T __x) { \ + return ::__fn((double)__x); \ + } + +// __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double +// or integer argument to avoid compilation error due to ambibuity. e.g. +// max(5.0f, 6.0) is resolved with max(double, double). +#define __HIP_OVERLOAD2(__retty, __fn) \ + template \ + __DEVICE__ \ + typename __hip_enable_if::is_specialized && \ + std::numeric_limits<__T2>::is_specialized, \ + __retty>::type \ + __fn(__T1 __x, __T2 __y) { \ + return __fn((double)__x, (double)__y); \ + } + +__HIP_OVERLOAD1(double, abs) +__HIP_OVERLOAD1(double, acos) +__HIP_OVERLOAD1(double, acosh) +__HIP_OVERLOAD1(double, asin) +__HIP_OVERLOAD1(double, asinh) +__HIP_OVERLOAD1(double, atan) +__HIP_OVERLOAD2(double, atan2) +__HIP_OVERLOAD1(double, atanh) +__HIP_OVERLOAD1(double, cbrt) +__HIP_OVERLOAD1(double, ceil) +__HIP_OVERLOAD2(double, copysign) +__HIP_OVERLOAD1(double, cos) +__HIP_OVERLOAD1(double, cosh) +__HIP_OVERLOAD1(double, erf) +__HIP_OVERLOAD1(double, erfc) +__HIP_OVERLOAD1(double, exp) +__HIP_OVERLOAD1(double, exp2) +__HIP_OVERLOAD1(double, expm1) +__HIP_OVERLOAD1(double, fabs) +__HIP_OVERLOAD2(double, fdim) +__HIP_OVERLOAD1(double, floor) +__HIP_OVERLOAD2(double, fmax) +__HIP_OVERLOAD2(double, fmin) +__HIP_OVERLOAD2(double, fmod) +__HIP_OVERLOAD1(int, fpclassify) +__HIP_OVERLOAD2(double, hypot) +__HIP_OVERLOAD1(int, ilogb) +__HIP_OVERLOAD1(bool, isfinite) +__HIP_OVERLOAD2(bool, isgreater) +__HIP_OVERLOAD2(bool, isgreaterequal) +__HIP_OVERLOAD1(bool, isinf) +__HIP_OVERLOAD2(bool, isless) +__HIP_OVERLOAD2(bool, islessequal) +__HIP_OVERLOAD2(bool, islessgreater) +__HIP_OVERLOAD1(bool, isnan) +__HIP_OVERLOAD1(bool, isnormal) +__HIP_OVERLOAD2(bool, isunordered) +__HIP_OVERLOAD1(double, lgamma) +__HIP_OVERLOAD1(double, log) +__HIP_OVERLOAD1(double, log10) +__HIP_OVERLOAD1(double, log1p) +__HIP_OVERLOAD1(double, log2) +__HIP_OVERLOAD1(double, logb) +__HIP_OVERLOAD1(long long, llrint) +__HIP_OVERLOAD1(long long, llround) +__HIP_OVERLOAD1(long, lrint) +__HIP_OVERLOAD1(long, lround) +__HIP_OVERLOAD1(double, nearbyint) +__HIP_OVERLOAD2(double, nextafter) +__HIP_OVERLOAD2(double, pow) +__HIP_OVERLOAD2(double, remainder) +__HIP_OVERLOAD1(double, rint) +__HIP_OVERLOAD1(double, round) +__HIP_OVERLOAD1(bool, signbit) +__HIP_OVERLOAD1(double, sin) +__HIP_OVERLOAD1(double, sinh) +__HIP_OVERLOAD1(double, sqrt) +__HIP_OVERLOAD1(double, tan) +__HIP_OVERLOAD1(double, tanh) +__HIP_OVERLOAD1(double, tgamma) +__HIP_OVERLOAD1(double, trunc) + +// Overload these but don't add them to std, they are not part of cmath. +__HIP_OVERLOAD2(double, max) +__HIP_OVERLOAD2(double, min) + +// Additional Overloads that don't quite match HIP_OVERLOAD. +template +__DEVICE__ + typename __hip_enable_if::is_specialized && + std::numeric_limits<__T2>::is_specialized && + std::numeric_limits<__T3>::is_specialized, + double>::type + fma(__T1 __x, __T2 __y, __T3 __z) { + return ::fma((double)__x, (double)__y, (double)__z); +} + +template +__DEVICE__ + typename __hip_enable_if::is_integer, double>::type + frexp(__T __x, int *__exp) { + return ::frexp((double)__x, __exp); +} + +template +__DEVICE__ + typename __hip_enable_if::is_integer, double>::type + ldexp(__T __x, int __exp) { + return ::ldexp((double)__x, __exp); +} + +template +__DEVICE__ + typename __hip_enable_if::is_integer, double>::type + modf(__T __x, double *__exp) { + return ::modf((double)__x, __exp); +} + +template +__DEVICE__ + typename __hip_enable_if::is_specialized && + std::numeric_limits<__T2>::is_specialized, + double>::type + remquo(__T1 __x, __T2 __y, int *__quo) { + return ::remquo((double)__x, (double)__y, __quo); +} + +template +__DEVICE__ + typename __hip_enable_if::is_integer, double>::type + scalbln(__T __x, long int __exp) { + return ::scalbln((double)__x, __exp); +} + +template +__DEVICE__ + typename __hip_enable_if::is_integer, double>::type + scalbn(__T __x, int __exp) { + return ::scalbn((double)__x, __exp); +} + +#pragma pop_macro("__HIP_OVERLOAD1") +#pragma pop_macro("__HIP_OVERLOAD2") + +// END HIP_OVERLOAD + +// END DEF_FUN and HIP_OVERLOAD + +#endif // defined(__cplusplus) + +// Define these overloads inside the namespace our standard library uses. +#ifdef _LIBCPP_BEGIN_NAMESPACE_STD +_LIBCPP_BEGIN_NAMESPACE_STD +#else +namespace std { +#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION +_GLIBCXX_BEGIN_NAMESPACE_VERSION +#endif +#endif + +// Pull the new overloads we defined above into namespace std. +// using ::abs; +using ::acos; +using ::acosh; +using ::asin; +using ::asinh; +using ::atan; +using ::atan2; +using ::atanh; +using ::cbrt; +using ::ceil; +using ::copysign; +using ::cos; +using ::cosh; +using ::erf; +using ::erfc; +using ::exp; +using ::exp2; +using ::expm1; +using ::fabs; +using ::fdim; +using ::floor; +using ::fma; +using ::fmax; +using ::fmin; +using ::fmod; +using ::fpclassify; +using ::frexp; +using ::hypot; +using ::ilogb; +using ::isfinite; +using ::isgreater; +using ::isgreaterequal; +using ::isless; +using ::islessequal; +using ::islessgreater; +using ::isnormal; +using ::isunordered; +using ::ldexp; +using ::lgamma; +using ::llrint; +using ::llround; +using ::log; +using ::log10; +using ::log1p; +using ::log2; +using ::logb; +using ::lrint; +using ::lround; +using ::modf; +// using ::nan; +// using ::nanf; +// using ::nanl; - This is not yet defined. +using ::nearbyint; +using ::nextafter; +// using ::nexttoward; - Omit this since we do not have a definition. +using ::pow; +using ::remainder; +using ::remquo; +using ::rint; +using ::round; +using ::scalbln; +using ::scalbn; +using ::signbit; +using ::sin; +using ::sinh; +using ::sqrt; +using ::tan; +using ::tanh; +using ::tgamma; +using ::trunc; + +// Well this is fun: We need to pull these symbols in for libc++, but we can't +// pull them in with libstdc++, because its ::isinf and ::isnan are different +// than its std::isinf and std::isnan. +#ifndef __GLIBCXX__ +using ::isinf; +using ::isnan; +#endif + +// Finally, pull the "foobarf" functions that HIP defines into std. +using ::acosf; +using ::acoshf; +using ::asinf; +using ::asinhf; +using ::atan2f; +using ::atanf; +using ::atanhf; +using ::cbrtf; +using ::ceilf; +using ::copysignf; +using ::cosf; +using ::coshf; +using ::erfcf; +using ::erff; +using ::exp2f; +using ::expf; +using ::expm1f; +using ::fabsf; +using ::fdimf; +using ::floorf; +using ::fmaf; +using ::fmaxf; +using ::fminf; +using ::fmodf; +using ::frexpf; +using ::hypotf; +using ::ilogbf; +using ::ldexpf; +using ::lgammaf; +using ::llrintf; +using ::llroundf; +using ::log10f; +using ::log1pf; +using ::log2f; +using ::logbf; +using ::logf; +using ::lrintf; +using ::lroundf; +using ::modff; +using ::nearbyintf; +using ::nextafterf; +// using ::nexttowardf; - Omit this since we do not have a definition. +using ::powf; +using ::remainderf; +using ::remquof; +using ::rintf; +using ::roundf; +using ::scalblnf; +using ::scalbnf; +using ::sinf; +using ::sinhf; +using ::sqrtf; +using ::tanf; +using ::tanhf; +using ::tgammaf; +using ::truncf; + +#ifdef _LIBCPP_END_NAMESPACE_STD +_LIBCPP_END_NAMESPACE_STD +#else +#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION +_GLIBCXX_END_NAMESPACE_VERSION +#endif +} // namespace std +#endif + +#pragma pop_macro("__DEVICE__") + +#endif // __CLANG_HIP_CMATH_H__ diff --git a/clang/lib/Headers/__clang_hip_libdevice_declares.h b/clang/lib/Headers/__clang_hip_libdevice_declares.h index de2f82cd8eca6..ac98907ad5de6 100644 --- a/clang/lib/Headers/__clang_hip_libdevice_declares.h +++ b/clang/lib/Headers/__clang_hip_libdevice_declares.h @@ -129,10 +129,10 @@ __device__ __attribute__((const)) float __ocml_div_rte_f32(float, float); __device__ __attribute__((const)) float __ocml_div_rtn_f32(float, float); __device__ __attribute__((const)) float __ocml_div_rtp_f32(float, float); __device__ __attribute__((const)) float __ocml_div_rtz_f32(float, float); -__device__ __attribute__((const)) float __ocml_sqrt_rte_f32(float, float); -__device__ __attribute__((const)) float __ocml_sqrt_rtn_f32(float, float); -__device__ __attribute__((const)) float __ocml_sqrt_rtp_f32(float, float); -__device__ __attribute__((const)) float __ocml_sqrt_rtz_f32(float, float); +__device__ __attribute__((const)) float __ocml_sqrt_rte_f32(float); +__device__ __attribute__((const)) float __ocml_sqrt_rtn_f32(float); +__device__ __attribute__((const)) float __ocml_sqrt_rtp_f32(float); +__device__ __attribute__((const)) float __ocml_sqrt_rtz_f32(float); __device__ __attribute__((const)) float __ocml_fma_rte_f32(float, float, float); __device__ __attribute__((const)) float __ocml_fma_rtn_f32(float, float, float); __device__ __attribute__((const)) float __ocml_fma_rtp_f32(float, float, float); @@ -256,10 +256,10 @@ __device__ __attribute__((const)) double __ocml_div_rte_f64(double, double); __device__ __attribute__((const)) double __ocml_div_rtn_f64(double, double); __device__ __attribute__((const)) double __ocml_div_rtp_f64(double, double); __device__ __attribute__((const)) double __ocml_div_rtz_f64(double, double); -__device__ __attribute__((const)) double __ocml_sqrt_rte_f64(double, double); -__device__ __attribute__((const)) double __ocml_sqrt_rtn_f64(double, double); -__device__ __attribute__((const)) double __ocml_sqrt_rtp_f64(double, double); -__device__ __attribute__((const)) double __ocml_sqrt_rtz_f64(double, double); +__device__ __attribute__((const)) double __ocml_sqrt_rte_f64(double); +__device__ __attribute__((const)) double __ocml_sqrt_rtn_f64(double); +__device__ __attribute__((const)) double __ocml_sqrt_rtp_f64(double); +__device__ __attribute__((const)) double __ocml_sqrt_rtz_f64(double); __device__ __attribute__((const)) double __ocml_fma_rte_f64(double, double, double); __device__ __attribute__((const)) double __ocml_fma_rtn_f64(double, double, diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h index b72bb40ccdb67..f2365e8844fed 100644 --- a/clang/lib/Headers/__clang_hip_math.h +++ b/clang/lib/Headers/__clang_hip_math.h @@ -1,4 +1,4 @@ -/*===---- __clang_hip_math.h - HIP math decls -------------------------------=== +/*===---- __clang_hip_math.h - Device-side HIP math support ----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. @@ -6,25 +6,32 @@ * *===-----------------------------------------------------------------------=== */ - #ifndef __CLANG_HIP_MATH_H__ #define __CLANG_HIP_MATH_H__ +#if !defined(__HIP__) +#error "This file is for HIP and OpenMP AMDGCN device compilation only." +#endif + +#if defined(__cplusplus) #include +#endif #include -#include #include -#include #pragma push_macro("__DEVICE__") -#pragma push_macro("__RETURN_TYPE") +#define __DEVICE__ static __device__ inline __attribute__((always_inline)) -// to be consistent with __clang_cuda_math_forward_declares -#define __DEVICE__ static __device__ +// A few functions return bool type starting only in C++11. +#pragma push_macro("__RETURN_TYPE") +#if defined(__cplusplus) #define __RETURN_TYPE bool +#else +#define __RETURN_TYPE int +#endif #if defined (__cplusplus) && __cplusplus < 201103L -//emulate static_assert on type sizes +// emulate static_assert on type sizes template struct __compare_result{}; template<> @@ -33,26 +40,23 @@ struct __compare_result { }; __DEVICE__ -inline void __suppress_unused_warning(bool b) {}; -template -__DEVICE__ -inline void __static_assert_equal_size() { - __suppress_unused_warning(__compare_result::valid); +void __suppress_unused_warning(bool b){}; +template +__DEVICE__ void __static_assert_equal_size() { + __suppress_unused_warning(__compare_result::valid); } #define __static_assert_type_size_equal(A, B) \ __static_assert_equal_size() #else - #define __static_assert_type_size_equal(A,B) \ static_assert((A) == (B), "") #endif - __DEVICE__ -inline uint64_t __make_mantissa_base8(const char *__tagp) { +uint64_t __make_mantissa_base8(const char *__tagp) { uint64_t __r = 0; while (__tagp) { char __tmp = *__tagp; @@ -69,7 +73,7 @@ inline uint64_t __make_mantissa_base8(const char *__tagp) { } __DEVICE__ -inline uint64_t __make_mantissa_base10(const char *__tagp) { +uint64_t __make_mantissa_base10(const char *__tagp) { uint64_t __r = 0; while (__tagp) { char __tmp = *__tagp; @@ -86,7 +90,7 @@ inline uint64_t __make_mantissa_base10(const char *__tagp) { } __DEVICE__ -inline uint64_t __make_mantissa_base16(const char *__tagp) { +uint64_t __make_mantissa_base16(const char *__tagp) { uint64_t __r = 0; while (__tagp) { char __tmp = *__tagp; @@ -107,7 +111,7 @@ inline uint64_t __make_mantissa_base16(const char *__tagp) { } __DEVICE__ -inline uint64_t __make_mantissa(const char *__tagp) { +uint64_t __make_mantissa(const char *__tagp) { if (!__tagp) return 0u; @@ -124,80 +128,124 @@ inline uint64_t __make_mantissa(const char *__tagp) { } // BEGIN FLOAT -#ifdef __cplusplus +#if defined(__cplusplus) +__DEVICE__ +int abs(int __x) { + int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1); + return (__x ^ __sgn) - __sgn; +} +__DEVICE__ +long labs(long __x) { + long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1); + return (__x ^ __sgn) - __sgn; +} __DEVICE__ -inline float abs(float __x) { return __ocml_fabs_f32(__x); } +long long llabs(long long __x) { + long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1); + return (__x ^ __sgn) - __sgn; +} #endif + __DEVICE__ -inline float acosf(float __x) { return __ocml_acos_f32(__x); } +float acosf(float __x) { return __ocml_acos_f32(__x); } + __DEVICE__ -inline float acoshf(float __x) { return __ocml_acosh_f32(__x); } +float acoshf(float __x) { return __ocml_acosh_f32(__x); } + __DEVICE__ -inline float asinf(float __x) { return __ocml_asin_f32(__x); } +float asinf(float __x) { return __ocml_asin_f32(__x); } + __DEVICE__ -inline float asinhf(float __x) { return __ocml_asinh_f32(__x); } +float asinhf(float __x) { return __ocml_asinh_f32(__x); } + __DEVICE__ -inline float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); } +float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); } + __DEVICE__ -inline float atanf(float __x) { return __ocml_atan_f32(__x); } +float atanf(float __x) { return __ocml_atan_f32(__x); } + __DEVICE__ -inline float atanhf(float __x) { return __ocml_atanh_f32(__x); } +float atanhf(float __x) { return __ocml_atanh_f32(__x); } + __DEVICE__ -inline float cbrtf(float __x) { return __ocml_cbrt_f32(__x); } +float cbrtf(float __x) { return __ocml_cbrt_f32(__x); } + __DEVICE__ -inline float ceilf(float __x) { return __ocml_ceil_f32(__x); } +float ceilf(float __x) { return __ocml_ceil_f32(__x); } + __DEVICE__ -inline float copysignf(float __x, float __y) { - return __ocml_copysign_f32(__x, __y); -} +float copysignf(float __x, float __y) { return __ocml_copysign_f32(__x, __y); } + __DEVICE__ -inline float cosf(float __x) { return __ocml_cos_f32(__x); } +float cosf(float __x) { return __ocml_cos_f32(__x); } + __DEVICE__ -inline float coshf(float __x) { return __ocml_cosh_f32(__x); } +float coshf(float __x) { return __ocml_cosh_f32(__x); } + __DEVICE__ -inline float cospif(float __x) { return __ocml_cospi_f32(__x); } +float cospif(float __x) { return __ocml_cospi_f32(__x); } + __DEVICE__ -inline float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); } +float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); } + __DEVICE__ -inline float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); } +float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); } + __DEVICE__ -inline float erfcf(float __x) { return __ocml_erfc_f32(__x); } +float erfcf(float __x) { return __ocml_erfc_f32(__x); } + __DEVICE__ -inline float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); } +float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); } + __DEVICE__ -inline float erfcxf(float __x) { return __ocml_erfcx_f32(__x); } +float erfcxf(float __x) { return __ocml_erfcx_f32(__x); } + __DEVICE__ -inline float erff(float __x) { return __ocml_erf_f32(__x); } +float erff(float __x) { return __ocml_erf_f32(__x); } + __DEVICE__ -inline float erfinvf(float __x) { return __ocml_erfinv_f32(__x); } +float erfinvf(float __x) { return __ocml_erfinv_f32(__x); } + __DEVICE__ -inline float exp10f(float __x) { return __ocml_exp10_f32(__x); } +float exp10f(float __x) { return __ocml_exp10_f32(__x); } + __DEVICE__ -inline float exp2f(float __x) { return __ocml_exp2_f32(__x); } +float exp2f(float __x) { return __ocml_exp2_f32(__x); } + __DEVICE__ -inline float expf(float __x) { return __ocml_exp_f32(__x); } +float expf(float __x) { return __ocml_exp_f32(__x); } + __DEVICE__ -inline float expm1f(float __x) { return __ocml_expm1_f32(__x); } +float expm1f(float __x) { return __ocml_expm1_f32(__x); } + __DEVICE__ -inline float fabsf(float __x) { return __ocml_fabs_f32(__x); } +float fabsf(float __x) { return __ocml_fabs_f32(__x); } + __DEVICE__ -inline float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); } +float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); } + __DEVICE__ -inline float fdividef(float __x, float __y) { return __x / __y; } +float fdividef(float __x, float __y) { return __x / __y; } + __DEVICE__ -inline float floorf(float __x) { return __ocml_floor_f32(__x); } +float floorf(float __x) { return __ocml_floor_f32(__x); } + __DEVICE__ -inline float fmaf(float __x, float __y, float __z) { +float fmaf(float __x, float __y, float __z) { return __ocml_fma_f32(__x, __y, __z); } + __DEVICE__ -inline float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); } +float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); } + __DEVICE__ -inline float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); } +float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); } + __DEVICE__ -inline float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); } +float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); } + __DEVICE__ -inline float frexpf(float __x, int *__nptr) { +float frexpf(float __x, int *__nptr) { int __tmp; float __r = __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp); @@ -205,24 +253,31 @@ inline float frexpf(float __x, int *__nptr) { return __r; } + __DEVICE__ -inline float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); } +float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); } + __DEVICE__ -inline int ilogbf(float __x) { return __ocml_ilogb_f32(__x); } +int ilogbf(float __x) { return __ocml_ilogb_f32(__x); } + __DEVICE__ -inline __RETURN_TYPE isfinite(float __x) { return __ocml_isfinite_f32(__x); } +__RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); } + __DEVICE__ -inline __RETURN_TYPE isinf(float __x) { return __ocml_isinf_f32(__x); } +__RETURN_TYPE __isinff(float __x) { return __ocml_isinf_f32(__x); } + __DEVICE__ -inline __RETURN_TYPE isnan(float __x) { return __ocml_isnan_f32(__x); } +__RETURN_TYPE __isnanf(float __x) { return __ocml_isnan_f32(__x); } + __DEVICE__ -inline float j0f(float __x) { return __ocml_j0_f32(__x); } +float j0f(float __x) { return __ocml_j0_f32(__x); } + __DEVICE__ -inline float j1f(float __x) { return __ocml_j1_f32(__x); } +float j1f(float __x) { return __ocml_j1_f32(__x); } + __DEVICE__ -inline float jnf(int __n, - float __x) { // TODO: we could use Ahmes multiplication - // and the Miller & Brown algorithm +float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication + // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if // it'd be beneficial in this case. if (__n == 0) @@ -240,46 +295,58 @@ inline float jnf(int __n, return __x1; } + __DEVICE__ -inline float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); } +float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); } + __DEVICE__ -inline float lgammaf(float __x) { return __ocml_lgamma_f32(__x); } +float lgammaf(float __x) { return __ocml_lgamma_f32(__x); } + __DEVICE__ -inline long long int llrintf(float __x) { return __ocml_rint_f32(__x); } +long long int llrintf(float __x) { return __ocml_rint_f32(__x); } + __DEVICE__ -inline long long int llroundf(float __x) { return __ocml_round_f32(__x); } +long long int llroundf(float __x) { return __ocml_round_f32(__x); } + __DEVICE__ -inline float log10f(float __x) { return __ocml_log10_f32(__x); } +float log10f(float __x) { return __ocml_log10_f32(__x); } + __DEVICE__ -inline float log1pf(float __x) { return __ocml_log1p_f32(__x); } +float log1pf(float __x) { return __ocml_log1p_f32(__x); } + __DEVICE__ -inline float log2f(float __x) { return __ocml_log2_f32(__x); } +float log2f(float __x) { return __ocml_log2_f32(__x); } + __DEVICE__ -inline float logbf(float __x) { return __ocml_logb_f32(__x); } +float logbf(float __x) { return __ocml_logb_f32(__x); } + __DEVICE__ -inline float logf(float __x) { return __ocml_log_f32(__x); } +float logf(float __x) { return __ocml_log_f32(__x); } + __DEVICE__ -inline long int lrintf(float __x) { return __ocml_rint_f32(__x); } +long int lrintf(float __x) { return __ocml_rint_f32(__x); } + __DEVICE__ -inline long int lroundf(float __x) { return __ocml_round_f32(__x); } +long int lroundf(float __x) { return __ocml_round_f32(__x); } + __DEVICE__ -inline float modff(float __x, float *__iptr) { +float modff(float __x, float *__iptr) { float __tmp; float __r = __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); *__iptr = __tmp; - return __r; } + __DEVICE__ -inline float nanf(const char *__tagp) { +float nanf(const char *__tagp) { union { float val; struct ieee_float { - uint32_t mantissa : 22; - uint32_t quiet : 1; - uint32_t exponent : 8; - uint32_t sign : 1; + unsigned int mantissa : 22; + unsigned int quiet : 1; + unsigned int exponent : 8; + unsigned int sign : 1; } bits; } __tmp; __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits)); @@ -291,28 +358,34 @@ inline float nanf(const char *__tagp) { return __tmp.val; } + __DEVICE__ -inline float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); } +float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); } + __DEVICE__ -inline float nextafterf(float __x, float __y) { +float nextafterf(float __x, float __y) { return __ocml_nextafter_f32(__x, __y); } + __DEVICE__ -inline float norm3df(float __x, float __y, float __z) { +float norm3df(float __x, float __y, float __z) { return __ocml_len3_f32(__x, __y, __z); } + __DEVICE__ -inline float norm4df(float __x, float __y, float __z, float __w) { +float norm4df(float __x, float __y, float __z, float __w) { return __ocml_len4_f32(__x, __y, __z, __w); } + __DEVICE__ -inline float normcdff(float __x) { return __ocml_ncdf_f32(__x); } +float normcdff(float __x) { return __ocml_ncdf_f32(__x); } + __DEVICE__ -inline float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); } +float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); } + __DEVICE__ -inline float -normf(int __dim, - const float *__a) { // TODO: placeholder until OCML adds support. +float normf(int __dim, + const float *__a) { // TODO: placeholder until OCML adds support. float __r = 0; while (__dim--) { __r += __a[0] * __a[0]; @@ -321,18 +394,23 @@ normf(int __dim, return __ocml_sqrt_f32(__r); } + __DEVICE__ -inline float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } +float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } + __DEVICE__ -inline float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); } +float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); } + __DEVICE__ -inline float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); } +float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); } + __DEVICE__ -inline float remainderf(float __x, float __y) { +float remainderf(float __x, float __y) { return __ocml_remainder_f32(__x, __y); } + __DEVICE__ -inline float remquof(float __x, float __y, int *__quo) { +float remquof(float __x, float __y, int *__quo) { int __tmp; float __r = __ocml_remquo_f32( __x, __y, (__attribute__((address_space(5))) int *)&__tmp); @@ -340,25 +418,26 @@ inline float remquof(float __x, float __y, int *__quo) { return __r; } + __DEVICE__ -inline float rhypotf(float __x, float __y) { - return __ocml_rhypot_f32(__x, __y); -} +float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); } + __DEVICE__ -inline float rintf(float __x) { return __ocml_rint_f32(__x); } +float rintf(float __x) { return __ocml_rint_f32(__x); } + __DEVICE__ -inline float rnorm3df(float __x, float __y, float __z) { +float rnorm3df(float __x, float __y, float __z) { return __ocml_rlen3_f32(__x, __y, __z); } __DEVICE__ -inline float rnorm4df(float __x, float __y, float __z, float __w) { +float rnorm4df(float __x, float __y, float __z, float __w) { return __ocml_rlen4_f32(__x, __y, __z, __w); } + __DEVICE__ -inline float -rnormf(int __dim, - const float *__a) { // TODO: placeholder until OCML adds support. +float rnormf(int __dim, + const float *__a) { // TODO: placeholder until OCML adds support. float __r = 0; while (__dim--) { __r += __a[0] * __a[0]; @@ -367,59 +446,74 @@ rnormf(int __dim, return __ocml_rsqrt_f32(__r); } + __DEVICE__ -inline float roundf(float __x) { return __ocml_round_f32(__x); } +float roundf(float __x) { return __ocml_round_f32(__x); } + __DEVICE__ -inline float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); } +float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); } + __DEVICE__ -inline float scalblnf(float __x, long int __n) { +float scalblnf(float __x, long int __n) { return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n) : __ocml_scalb_f32(__x, __n); } + __DEVICE__ -inline float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); } +float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); } + __DEVICE__ -inline __RETURN_TYPE signbit(float __x) { return __ocml_signbit_f32(__x); } +__RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); } + __DEVICE__ -inline void sincosf(float __x, float *__sinptr, float *__cosptr) { +void sincosf(float __x, float *__sinptr, float *__cosptr) { float __tmp; - *__sinptr = __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); *__cosptr = __tmp; } + __DEVICE__ -inline void sincospif(float __x, float *__sinptr, float *__cosptr) { +void sincospif(float __x, float *__sinptr, float *__cosptr) { float __tmp; - *__sinptr = __ocml_sincospi_f32( __x, (__attribute__((address_space(5))) float *)&__tmp); *__cosptr = __tmp; } + __DEVICE__ -inline float sinf(float __x) { return __ocml_sin_f32(__x); } +float sinf(float __x) { return __ocml_sin_f32(__x); } + __DEVICE__ -inline float sinhf(float __x) { return __ocml_sinh_f32(__x); } +float sinhf(float __x) { return __ocml_sinh_f32(__x); } + __DEVICE__ -inline float sinpif(float __x) { return __ocml_sinpi_f32(__x); } +float sinpif(float __x) { return __ocml_sinpi_f32(__x); } + __DEVICE__ -inline float sqrtf(float __x) { return __ocml_sqrt_f32(__x); } +float sqrtf(float __x) { return __ocml_sqrt_f32(__x); } + __DEVICE__ -inline float tanf(float __x) { return __ocml_tan_f32(__x); } +float tanf(float __x) { return __ocml_tan_f32(__x); } + __DEVICE__ -inline float tanhf(float __x) { return __ocml_tanh_f32(__x); } +float tanhf(float __x) { return __ocml_tanh_f32(__x); } + __DEVICE__ -inline float tgammaf(float __x) { return __ocml_tgamma_f32(__x); } +float tgammaf(float __x) { return __ocml_tgamma_f32(__x); } + __DEVICE__ -inline float truncf(float __x) { return __ocml_trunc_f32(__x); } +float truncf(float __x) { return __ocml_trunc_f32(__x); } + __DEVICE__ -inline float y0f(float __x) { return __ocml_y0_f32(__x); } +float y0f(float __x) { return __ocml_y0_f32(__x); } + __DEVICE__ -inline float y1f(float __x) { return __ocml_y1_f32(__x); } +float y1f(float __x) { return __ocml_y1_f32(__x); } + __DEVICE__ -inline float ynf(int __n, - float __x) { // TODO: we could use Ahmes multiplication - // and the Miller & Brown algorithm +float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication + // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if // it'd be beneficial in this case. Placeholder until OCML adds // support. @@ -440,292 +534,328 @@ inline float ynf(int __n, } // BEGIN INTRINSICS + __DEVICE__ -inline float __cosf(float __x) { return __ocml_native_cos_f32(__x); } +float __cosf(float __x) { return __ocml_native_cos_f32(__x); } + __DEVICE__ -inline float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); } +float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); } + __DEVICE__ -inline float __expf(float __x) { return __ocml_native_exp_f32(__x); } +float __expf(float __x) { return __ocml_native_exp_f32(__x); } + #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __fadd_rd(float __x, float __y) { - return __ocml_add_rtn_f32(__x, __y); -} +float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); } #endif __DEVICE__ -inline float __fadd_rn(float __x, float __y) { return __x + __y; } +float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __fadd_ru(float __x, float __y) { - return __ocml_add_rtp_f32(__x, __y); -} +float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); } + __DEVICE__ -inline float __fadd_rz(float __x, float __y) { - return __ocml_add_rtz_f32(__x, __y); -} +float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); } + __DEVICE__ -inline float __fdiv_rd(float __x, float __y) { - return __ocml_div_rtn_f32(__x, __y); -} +float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); } #endif __DEVICE__ -inline float __fdiv_rn(float __x, float __y) { return __x / __y; } +float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __fdiv_ru(float __x, float __y) { - return __ocml_div_rtp_f32(__x, __y); -} +float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); } + __DEVICE__ -inline float __fdiv_rz(float __x, float __y) { - return __ocml_div_rtz_f32(__x, __y); -} +float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); } #endif __DEVICE__ -inline float __fdividef(float __x, float __y) { return __x / __y; } +float __fdividef(float __x, float __y) { return __x / __y; } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __fmaf_rd(float __x, float __y, float __z) { +float __fmaf_rd(float __x, float __y, float __z) { return __ocml_fma_rtn_f32(__x, __y, __z); } #endif __DEVICE__ -inline float __fmaf_rn(float __x, float __y, float __z) { - return __ocml_fma_f32(__x, __y, __z); +float __fmaf_rn(float __x, float __y, float __z) { + return __ocml_fma_rte_f32(__x, __y, __z); } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __fmaf_ru(float __x, float __y, float __z) { +float __fmaf_ru(float __x, float __y, float __z) { return __ocml_fma_rtp_f32(__x, __y, __z); } + __DEVICE__ -inline float __fmaf_rz(float __x, float __y, float __z) { +float __fmaf_rz(float __x, float __y, float __z) { return __ocml_fma_rtz_f32(__x, __y, __z); } + __DEVICE__ -inline float __fmul_rd(float __x, float __y) { - return __ocml_mul_rtn_f32(__x, __y); -} +float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); } #endif __DEVICE__ -inline float __fmul_rn(float __x, float __y) { return __x * __y; } +float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __fmul_ru(float __x, float __y) { - return __ocml_mul_rtp_f32(__x, __y); -} +float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); } + __DEVICE__ -inline float __fmul_rz(float __x, float __y) { - return __ocml_mul_rtz_f32(__x, __y); -} +float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); } + __DEVICE__ -inline float __frcp_rd(float __x) { return __llvm_amdgcn_rcp_f32(__x); } +float __frcp_rd(float __x) { return __llvm_amdgcn_rcp_f32(__x); } #endif __DEVICE__ -inline float __frcp_rn(float __x) { return __llvm_amdgcn_rcp_f32(__x); } +float __frcp_rn(float __x) { return __llvm_amdgcn_rcp_f32(__x); } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __frcp_ru(float __x) { return __llvm_amdgcn_rcp_f32(__x); } +float __frcp_ru(float __x) { return __llvm_amdgcn_rcp_f32(__x); } + __DEVICE__ -inline float __frcp_rz(float __x) { return __llvm_amdgcn_rcp_f32(__x); } +float __frcp_rz(float __x) { return __llvm_amdgcn_rcp_f32(__x); } #endif __DEVICE__ -inline float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); } +float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); } +float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); } #endif __DEVICE__ -inline float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); } +float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); } +float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); } + __DEVICE__ -inline float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); } +float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); } + __DEVICE__ -inline float __fsub_rd(float __x, float __y) { - return __ocml_sub_rtn_f32(__x, __y); -} +float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); } #endif __DEVICE__ -inline float __fsub_rn(float __x, float __y) { return __x - __y; } +float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __fsub_ru(float __x, float __y) { - return __ocml_sub_rtp_f32(__x, __y); -} +float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); } + __DEVICE__ -inline float __fsub_rz(float __x, float __y) { - return __ocml_sub_rtz_f32(__x, __y); -} +float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); } #endif __DEVICE__ -inline float __log10f(float __x) { return __ocml_native_log10_f32(__x); } +float __log10f(float __x) { return __ocml_native_log10_f32(__x); } + __DEVICE__ -inline float __log2f(float __x) { return __ocml_native_log2_f32(__x); } +float __log2f(float __x) { return __ocml_native_log2_f32(__x); } + __DEVICE__ -inline float __logf(float __x) { return __ocml_native_log_f32(__x); } +float __logf(float __x) { return __ocml_native_log_f32(__x); } + __DEVICE__ -inline float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } +float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } + __DEVICE__ -inline float __saturatef(float __x) { - return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); -} +float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); } + __DEVICE__ -inline void __sincosf(float __x, float *__sinptr, float *__cosptr) { +void __sincosf(float __x, float *__sinptr, float *__cosptr) { *__sinptr = __ocml_native_sin_f32(__x); *__cosptr = __ocml_native_cos_f32(__x); } + __DEVICE__ -inline float __sinf(float __x) { return __ocml_native_sin_f32(__x); } +float __sinf(float __x) { return __ocml_native_sin_f32(__x); } + __DEVICE__ -inline float __tanf(float __x) { return __ocml_tan_f32(__x); } +float __tanf(float __x) { return __ocml_tan_f32(__x); } // END INTRINSICS // END FLOAT // BEGIN DOUBLE -#ifdef __cplusplus -__DEVICE__ -inline double abs(double __x) { return __ocml_fabs_f64(__x); } -#endif __DEVICE__ -inline double acos(double __x) { return __ocml_acos_f64(__x); } +double acos(double __x) { return __ocml_acos_f64(__x); } + __DEVICE__ -inline double acosh(double __x) { return __ocml_acosh_f64(__x); } +double acosh(double __x) { return __ocml_acosh_f64(__x); } + __DEVICE__ -inline double asin(double __x) { return __ocml_asin_f64(__x); } +double asin(double __x) { return __ocml_asin_f64(__x); } + __DEVICE__ -inline double asinh(double __x) { return __ocml_asinh_f64(__x); } +double asinh(double __x) { return __ocml_asinh_f64(__x); } + __DEVICE__ -inline double atan(double __x) { return __ocml_atan_f64(__x); } +double atan(double __x) { return __ocml_atan_f64(__x); } + __DEVICE__ -inline double atan2(double __x, double __y) { - return __ocml_atan2_f64(__x, __y); -} +double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); } + __DEVICE__ -inline double atanh(double __x) { return __ocml_atanh_f64(__x); } +double atanh(double __x) { return __ocml_atanh_f64(__x); } + __DEVICE__ -inline double cbrt(double __x) { return __ocml_cbrt_f64(__x); } +double cbrt(double __x) { return __ocml_cbrt_f64(__x); } + __DEVICE__ -inline double ceil(double __x) { return __ocml_ceil_f64(__x); } +double ceil(double __x) { return __ocml_ceil_f64(__x); } + __DEVICE__ -inline double copysign(double __x, double __y) { +double copysign(double __x, double __y) { return __ocml_copysign_f64(__x, __y); } + __DEVICE__ -inline double cos(double __x) { return __ocml_cos_f64(__x); } +double cos(double __x) { return __ocml_cos_f64(__x); } + __DEVICE__ -inline double cosh(double __x) { return __ocml_cosh_f64(__x); } +double cosh(double __x) { return __ocml_cosh_f64(__x); } + __DEVICE__ -inline double cospi(double __x) { return __ocml_cospi_f64(__x); } +double cospi(double __x) { return __ocml_cospi_f64(__x); } + __DEVICE__ -inline double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); } +double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); } + __DEVICE__ -inline double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); } +double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); } + __DEVICE__ -inline double erf(double __x) { return __ocml_erf_f64(__x); } +double erf(double __x) { return __ocml_erf_f64(__x); } + __DEVICE__ -inline double erfc(double __x) { return __ocml_erfc_f64(__x); } +double erfc(double __x) { return __ocml_erfc_f64(__x); } + __DEVICE__ -inline double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); } +double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); } + __DEVICE__ -inline double erfcx(double __x) { return __ocml_erfcx_f64(__x); } +double erfcx(double __x) { return __ocml_erfcx_f64(__x); } + __DEVICE__ -inline double erfinv(double __x) { return __ocml_erfinv_f64(__x); } +double erfinv(double __x) { return __ocml_erfinv_f64(__x); } + __DEVICE__ -inline double exp(double __x) { return __ocml_exp_f64(__x); } +double exp(double __x) { return __ocml_exp_f64(__x); } + __DEVICE__ -inline double exp10(double __x) { return __ocml_exp10_f64(__x); } +double exp10(double __x) { return __ocml_exp10_f64(__x); } + __DEVICE__ -inline double exp2(double __x) { return __ocml_exp2_f64(__x); } +double exp2(double __x) { return __ocml_exp2_f64(__x); } + __DEVICE__ -inline double expm1(double __x) { return __ocml_expm1_f64(__x); } +double expm1(double __x) { return __ocml_expm1_f64(__x); } + __DEVICE__ -inline double fabs(double __x) { return __ocml_fabs_f64(__x); } +double fabs(double __x) { return __ocml_fabs_f64(__x); } + __DEVICE__ -inline double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); } +double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); } + __DEVICE__ -inline double floor(double __x) { return __ocml_floor_f64(__x); } +double floor(double __x) { return __ocml_floor_f64(__x); } + __DEVICE__ -inline double fma(double __x, double __y, double __z) { +double fma(double __x, double __y, double __z) { return __ocml_fma_f64(__x, __y, __z); } + __DEVICE__ -inline double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); } +double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); } + __DEVICE__ -inline double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); } +double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); } + __DEVICE__ -inline double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); } +double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); } + __DEVICE__ -inline double frexp(double __x, int *__nptr) { +double frexp(double __x, int *__nptr) { int __tmp; double __r = __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp); *__nptr = __tmp; - return __r; } + __DEVICE__ -inline double hypot(double __x, double __y) { - return __ocml_hypot_f64(__x, __y); -} +double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); } + __DEVICE__ -inline int ilogb(double __x) { return __ocml_ilogb_f64(__x); } +int ilogb(double __x) { return __ocml_ilogb_f64(__x); } + __DEVICE__ -inline __RETURN_TYPE isfinite(double __x) { return __ocml_isfinite_f64(__x); } +__RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); } + __DEVICE__ -inline __RETURN_TYPE isinf(double __x) { return __ocml_isinf_f64(__x); } +__RETURN_TYPE __isinf(double __x) { return __ocml_isinf_f64(__x); } + __DEVICE__ -inline __RETURN_TYPE isnan(double __x) { return __ocml_isnan_f64(__x); } +__RETURN_TYPE __isnan(double __x) { return __ocml_isnan_f64(__x); } + __DEVICE__ -inline double j0(double __x) { return __ocml_j0_f64(__x); } +double j0(double __x) { return __ocml_j0_f64(__x); } + __DEVICE__ -inline double j1(double __x) { return __ocml_j1_f64(__x); } +double j1(double __x) { return __ocml_j1_f64(__x); } + __DEVICE__ -inline double jn(int __n, - double __x) { // TODO: we could use Ahmes multiplication - // and the Miller & Brown algorithm +double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication + // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if // it'd be beneficial in this case. Placeholder until OCML adds // support. if (__n == 0) - return j0f(__x); + return j0(__x); if (__n == 1) - return j1f(__x); + return j1(__x); - double __x0 = j0f(__x); - double __x1 = j1f(__x); + double __x0 = j0(__x); + double __x1 = j1(__x); for (int __i = 1; __i < __n; ++__i) { double __x2 = (2 * __i) / __x * __x1 - __x0; __x0 = __x1; __x1 = __x2; } - return __x1; } + __DEVICE__ -inline double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); } +double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); } + __DEVICE__ -inline double lgamma(double __x) { return __ocml_lgamma_f64(__x); } +double lgamma(double __x) { return __ocml_lgamma_f64(__x); } + __DEVICE__ -inline long long int llrint(double __x) { return __ocml_rint_f64(__x); } +long long int llrint(double __x) { return __ocml_rint_f64(__x); } + __DEVICE__ -inline long long int llround(double __x) { return __ocml_round_f64(__x); } +long long int llround(double __x) { return __ocml_round_f64(__x); } + __DEVICE__ -inline double log(double __x) { return __ocml_log_f64(__x); } +double log(double __x) { return __ocml_log_f64(__x); } + __DEVICE__ -inline double log10(double __x) { return __ocml_log10_f64(__x); } +double log10(double __x) { return __ocml_log10_f64(__x); } + __DEVICE__ -inline double log1p(double __x) { return __ocml_log1p_f64(__x); } +double log1p(double __x) { return __ocml_log1p_f64(__x); } + __DEVICE__ -inline double log2(double __x) { return __ocml_log2_f64(__x); } +double log2(double __x) { return __ocml_log2_f64(__x); } + __DEVICE__ -inline double logb(double __x) { return __ocml_logb_f64(__x); } +double logb(double __x) { return __ocml_logb_f64(__x); } + __DEVICE__ -inline long int lrint(double __x) { return __ocml_rint_f64(__x); } +long int lrint(double __x) { return __ocml_rint_f64(__x); } + __DEVICE__ -inline long int lround(double __x) { return __ocml_round_f64(__x); } +long int lround(double __x) { return __ocml_round_f64(__x); } + __DEVICE__ -inline double modf(double __x, double *__iptr) { +double modf(double __x, double *__iptr) { double __tmp; double __r = __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp); @@ -733,8 +863,9 @@ inline double modf(double __x, double *__iptr) { return __r; } + __DEVICE__ -inline double nan(const char *__tagp) { +double nan(const char *__tagp) { #if !_WIN32 union { double val; @@ -755,21 +886,23 @@ inline double nan(const char *__tagp) { return __tmp.val; #else __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double)); - uint64_t val = __make_mantissa(__tagp); - val |= 0xFFF << 51; - return *reinterpret_cast(&val); + uint64_t __val = __make_mantissa(__tagp); + __val |= 0xFFF << 51; + return *reinterpret_cast(&__val); #endif } + __DEVICE__ -inline double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); } +double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); } + __DEVICE__ -inline double nextafter(double __x, double __y) { +double nextafter(double __x, double __y) { return __ocml_nextafter_f64(__x, __y); } + __DEVICE__ -inline double -norm(int __dim, - const double *__a) { // TODO: placeholder until OCML adds support. +double norm(int __dim, + const double *__a) { // TODO: placeholder until OCML adds support. double __r = 0; while (__dim--) { __r += __a[0] * __a[0]; @@ -778,30 +911,39 @@ norm(int __dim, return __ocml_sqrt_f64(__r); } + __DEVICE__ -inline double norm3d(double __x, double __y, double __z) { +double norm3d(double __x, double __y, double __z) { return __ocml_len3_f64(__x, __y, __z); } + __DEVICE__ -inline double norm4d(double __x, double __y, double __z, double __w) { +double norm4d(double __x, double __y, double __z, double __w) { return __ocml_len4_f64(__x, __y, __z, __w); } + __DEVICE__ -inline double normcdf(double __x) { return __ocml_ncdf_f64(__x); } +double normcdf(double __x) { return __ocml_ncdf_f64(__x); } + __DEVICE__ -inline double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); } +double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); } + __DEVICE__ -inline double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); } +double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); } + __DEVICE__ -inline double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); } +double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); } + __DEVICE__ -inline double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); } +double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); } + __DEVICE__ -inline double remainder(double __x, double __y) { +double remainder(double __x, double __y) { return __ocml_remainder_f64(__x, __y); } + __DEVICE__ -inline double remquo(double __x, double __y, int *__quo) { +double remquo(double __x, double __y, int *__quo) { int __tmp; double __r = __ocml_remquo_f64( __x, __y, (__attribute__((address_space(5))) int *)&__tmp); @@ -809,16 +951,16 @@ inline double remquo(double __x, double __y, int *__quo) { return __r; } + __DEVICE__ -inline double rhypot(double __x, double __y) { - return __ocml_rhypot_f64(__x, __y); -} +double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); } + __DEVICE__ -inline double rint(double __x) { return __ocml_rint_f64(__x); } +double rint(double __x) { return __ocml_rint_f64(__x); } + __DEVICE__ -inline double -rnorm(int __dim, - const double *__a) { // TODO: placeholder until OCML adds support. +double rnorm(int __dim, + const double *__a) { // TODO: placeholder until OCML adds support. double __r = 0; while (__dim--) { __r += __a[0] * __a[0]; @@ -827,77 +969,93 @@ rnorm(int __dim, return __ocml_rsqrt_f64(__r); } + __DEVICE__ -inline double rnorm3d(double __x, double __y, double __z) { +double rnorm3d(double __x, double __y, double __z) { return __ocml_rlen3_f64(__x, __y, __z); } + __DEVICE__ -inline double rnorm4d(double __x, double __y, double __z, double __w) { +double rnorm4d(double __x, double __y, double __z, double __w) { return __ocml_rlen4_f64(__x, __y, __z, __w); } + __DEVICE__ -inline double round(double __x) { return __ocml_round_f64(__x); } +double round(double __x) { return __ocml_round_f64(__x); } + __DEVICE__ -inline double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); } +double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); } + __DEVICE__ -inline double scalbln(double __x, long int __n) { +double scalbln(double __x, long int __n) { return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n) : __ocml_scalb_f64(__x, __n); } __DEVICE__ -inline double scalbn(double __x, int __n) { - return __ocml_scalbn_f64(__x, __n); -} +double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); } + __DEVICE__ -inline __RETURN_TYPE signbit(double __x) { return __ocml_signbit_f64(__x); } +__RETURN_TYPE __signbit(double __x) { return __ocml_signbit_f64(__x); } + __DEVICE__ -inline double sin(double __x) { return __ocml_sin_f64(__x); } +double sin(double __x) { return __ocml_sin_f64(__x); } + __DEVICE__ -inline void sincos(double __x, double *__sinptr, double *__cosptr) { +void sincos(double __x, double *__sinptr, double *__cosptr) { double __tmp; *__sinptr = __ocml_sincos_f64( __x, (__attribute__((address_space(5))) double *)&__tmp); *__cosptr = __tmp; } + __DEVICE__ -inline void sincospi(double __x, double *__sinptr, double *__cosptr) { +void sincospi(double __x, double *__sinptr, double *__cosptr) { double __tmp; *__sinptr = __ocml_sincospi_f64( __x, (__attribute__((address_space(5))) double *)&__tmp); *__cosptr = __tmp; } + __DEVICE__ -inline double sinh(double __x) { return __ocml_sinh_f64(__x); } +double sinh(double __x) { return __ocml_sinh_f64(__x); } + __DEVICE__ -inline double sinpi(double __x) { return __ocml_sinpi_f64(__x); } +double sinpi(double __x) { return __ocml_sinpi_f64(__x); } + __DEVICE__ -inline double sqrt(double __x) { return __ocml_sqrt_f64(__x); } +double sqrt(double __x) { return __ocml_sqrt_f64(__x); } + __DEVICE__ -inline double tan(double __x) { return __ocml_tan_f64(__x); } +double tan(double __x) { return __ocml_tan_f64(__x); } + __DEVICE__ -inline double tanh(double __x) { return __ocml_tanh_f64(__x); } +double tanh(double __x) { return __ocml_tanh_f64(__x); } + __DEVICE__ -inline double tgamma(double __x) { return __ocml_tgamma_f64(__x); } +double tgamma(double __x) { return __ocml_tgamma_f64(__x); } + __DEVICE__ -inline double trunc(double __x) { return __ocml_trunc_f64(__x); } +double trunc(double __x) { return __ocml_trunc_f64(__x); } + __DEVICE__ -inline double y0(double __x) { return __ocml_y0_f64(__x); } +double y0(double __x) { return __ocml_y0_f64(__x); } + __DEVICE__ -inline double y1(double __x) { return __ocml_y1_f64(__x); } +double y1(double __x) { return __ocml_y1_f64(__x); } + __DEVICE__ -inline double yn(int __n, - double __x) { // TODO: we could use Ahmes multiplication - // and the Miller & Brown algorithm +double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication + // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if // it'd be beneficial in this case. Placeholder until OCML adds // support. if (__n == 0) - return j0f(__x); + return y0(__x); if (__n == 1) - return j1f(__x); + return y1(__x); - double __x0 = j0f(__x); - double __x1 = j1f(__x); + double __x0 = y0(__x); + double __x1 = y1(__x); for (int __i = 1; __i < __n; ++__i) { double __x2 = (2 * __i) / __x * __x1 - __x0; __x0 = __x1; @@ -910,297 +1068,168 @@ inline double yn(int __n, // BEGIN INTRINSICS #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline double __dadd_rd(double __x, double __y) { +double __dadd_rd(double __x, double __y) { return __ocml_add_rtn_f64(__x, __y); } #endif __DEVICE__ -inline double __dadd_rn(double __x, double __y) { return __x + __y; } +double __dadd_rn(double __x, double __y) { + return __ocml_add_rte_f64(__x, __y); +} #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline double __dadd_ru(double __x, double __y) { +double __dadd_ru(double __x, double __y) { return __ocml_add_rtp_f64(__x, __y); } + __DEVICE__ -inline double __dadd_rz(double __x, double __y) { +double __dadd_rz(double __x, double __y) { return __ocml_add_rtz_f64(__x, __y); } + __DEVICE__ -inline double __ddiv_rd(double __x, double __y) { +double __ddiv_rd(double __x, double __y) { return __ocml_div_rtn_f64(__x, __y); } #endif __DEVICE__ -inline double __ddiv_rn(double __x, double __y) { return __x / __y; } +double __ddiv_rn(double __x, double __y) { + return __ocml_div_rte_f64(__x, __y); +} #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline double __ddiv_ru(double __x, double __y) { +double __ddiv_ru(double __x, double __y) { return __ocml_div_rtp_f64(__x, __y); } + __DEVICE__ -inline double __ddiv_rz(double __x, double __y) { +double __ddiv_rz(double __x, double __y) { return __ocml_div_rtz_f64(__x, __y); } + __DEVICE__ -inline double __dmul_rd(double __x, double __y) { +double __dmul_rd(double __x, double __y) { return __ocml_mul_rtn_f64(__x, __y); } #endif __DEVICE__ -inline double __dmul_rn(double __x, double __y) { return __x * __y; } +double __dmul_rn(double __x, double __y) { + return __ocml_mul_rte_f64(__x, __y); +} #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline double __dmul_ru(double __x, double __y) { +double __dmul_ru(double __x, double __y) { return __ocml_mul_rtp_f64(__x, __y); } + __DEVICE__ -inline double __dmul_rz(double __x, double __y) { +double __dmul_rz(double __x, double __y) { return __ocml_mul_rtz_f64(__x, __y); } + __DEVICE__ -inline double __drcp_rd(double __x) { return __llvm_amdgcn_rcp_f64(__x); } +double __drcp_rd(double __x) { return __llvm_amdgcn_rcp_f64(__x); } #endif __DEVICE__ -inline double __drcp_rn(double __x) { return __llvm_amdgcn_rcp_f64(__x); } +double __drcp_rn(double __x) { return __llvm_amdgcn_rcp_f64(__x); } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline double __drcp_ru(double __x) { return __llvm_amdgcn_rcp_f64(__x); } +double __drcp_ru(double __x) { return __llvm_amdgcn_rcp_f64(__x); } + __DEVICE__ -inline double __drcp_rz(double __x) { return __llvm_amdgcn_rcp_f64(__x); } +double __drcp_rz(double __x) { return __llvm_amdgcn_rcp_f64(__x); } + __DEVICE__ -inline double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); } +double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); } #endif __DEVICE__ -inline double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); } +double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); } +double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); } + __DEVICE__ -inline double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); } +double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); } + __DEVICE__ -inline double __dsub_rd(double __x, double __y) { +double __dsub_rd(double __x, double __y) { return __ocml_sub_rtn_f64(__x, __y); } #endif __DEVICE__ -inline double __dsub_rn(double __x, double __y) { return __x - __y; } +double __dsub_rn(double __x, double __y) { + return __ocml_sub_rte_f64(__x, __y); +} #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline double __dsub_ru(double __x, double __y) { +double __dsub_ru(double __x, double __y) { return __ocml_sub_rtp_f64(__x, __y); } + __DEVICE__ -inline double __dsub_rz(double __x, double __y) { +double __dsub_rz(double __x, double __y) { return __ocml_sub_rtz_f64(__x, __y); } + __DEVICE__ -inline double __fma_rd(double __x, double __y, double __z) { +double __fma_rd(double __x, double __y, double __z) { return __ocml_fma_rtn_f64(__x, __y, __z); } #endif __DEVICE__ -inline double __fma_rn(double __x, double __y, double __z) { - return __ocml_fma_f64(__x, __y, __z); +double __fma_rn(double __x, double __y, double __z) { + return __ocml_fma_rte_f64(__x, __y, __z); } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline double __fma_ru(double __x, double __y, double __z) { +double __fma_ru(double __x, double __y, double __z) { return __ocml_fma_rtp_f64(__x, __y, __z); } + __DEVICE__ -inline double __fma_rz(double __x, double __y, double __z) { +double __fma_rz(double __x, double __y, double __z) { return __ocml_fma_rtz_f64(__x, __y, __z); } #endif // END INTRINSICS // END DOUBLE -// BEGIN INTEGER -__DEVICE__ -inline int abs(int __x) { - int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1); - return (__x ^ __sgn) - __sgn; -} -__DEVICE__ -inline long labs(long __x) { - long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1); - return (__x ^ __sgn) - __sgn; -} -__DEVICE__ -inline long long llabs(long long __x) { - long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1); - return (__x ^ __sgn) - __sgn; -} +// C only macros +#if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L +#define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x) +#define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x) +#define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x) +#define signbit(__x) \ + _Generic((__x), float : __signbitf, double : __signbit)(__x) +#endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L #if defined(__cplusplus) -__DEVICE__ -inline long abs(long __x) { return labs(__x); } -__DEVICE__ -inline long long abs(long long __x) { return llabs(__x); } -#endif -// END INTEGER - -__DEVICE__ -inline _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) { - return __ocml_fma_f16(__x, __y, __z); -} - -__DEVICE__ -inline float fma(float __x, float __y, float __z) { - return fmaf(__x, __y, __z); -} - -#pragma push_macro("__DEF_FUN1") -#pragma push_macro("__DEF_FUN2") -#pragma push_macro("__DEF_FUNI") -#pragma push_macro("__DEF_FLOAT_FUN2I") -#pragma push_macro("__HIP_OVERLOAD1") -#pragma push_macro("__HIP_OVERLOAD2") - -// __hip_enable_if::type is a type function which returns __T if __B is true. -template struct __hip_enable_if {}; - -template struct __hip_enable_if { typedef __T type; }; - -// __HIP_OVERLOAD1 is used to resolve function calls with integer argument to -// avoid compilation error due to ambibuity. e.g. floor(5) is resolved with -// floor(double). -#define __HIP_OVERLOAD1(__retty, __fn) \ - template \ - __DEVICE__ typename __hip_enable_if::is_integer, \ - __retty>::type \ - __fn(__T __x) { \ - return ::__fn((double)__x); \ - } - -// __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double -// or integer argument to avoid compilation error due to ambibuity. e.g. -// max(5.0f, 6.0) is resolved with max(double, double). -#define __HIP_OVERLOAD2(__retty, __fn) \ - template \ - __DEVICE__ \ - typename __hip_enable_if::is_specialized && \ - std::numeric_limits<__T2>::is_specialized, \ - __retty>::type \ - __fn(__T1 __x, __T2 __y) { \ - return __fn((double)__x, (double)__y); \ - } - -// Define cmath functions with float argument and returns float. -#define __DEF_FUN1(__retty, __func) \ - __DEVICE__ \ - inline float __func(float __x) { return __func##f(__x); } \ - __HIP_OVERLOAD1(__retty, __func) - -// Define cmath functions with float argument and returns __retty. -#define __DEF_FUNI(__retty, __func) \ - __DEVICE__ \ - inline __retty __func(float __x) { return __func##f(__x); } \ - __HIP_OVERLOAD1(__retty, __func) - -// define cmath functions with two float arguments. -#define __DEF_FUN2(__retty, __func) \ - __DEVICE__ \ - inline float __func(float __x, float __y) { return __func##f(__x, __y); } \ - __HIP_OVERLOAD2(__retty, __func) - -__DEF_FUN1(double, acos) -__DEF_FUN1(double, acosh) -__DEF_FUN1(double, asin) -__DEF_FUN1(double, asinh) -__DEF_FUN1(double, atan) -__DEF_FUN2(double, atan2); -__DEF_FUN1(double, atanh) -__DEF_FUN1(double, cbrt) -__DEF_FUN1(double, ceil) -__DEF_FUN2(double, copysign); -__DEF_FUN1(double, cos) -__DEF_FUN1(double, cosh) -__DEF_FUN1(double, erf) -__DEF_FUN1(double, erfc) -__DEF_FUN1(double, exp) -__DEF_FUN1(double, exp2) -__DEF_FUN1(double, expm1) -__DEF_FUN1(double, fabs) -__DEF_FUN2(double, fdim); -__DEF_FUN1(double, floor) -__DEF_FUN2(double, fmax); -__DEF_FUN2(double, fmin); -__DEF_FUN2(double, fmod); -//__HIP_OVERLOAD1(int, fpclassify) -__DEF_FUN2(double, hypot); -__DEF_FUNI(int, ilogb) -__HIP_OVERLOAD1(bool, isfinite) -__HIP_OVERLOAD2(bool, isgreater); -__HIP_OVERLOAD2(bool, isgreaterequal); -__HIP_OVERLOAD1(bool, isinf); -__HIP_OVERLOAD2(bool, isless); -__HIP_OVERLOAD2(bool, islessequal); -__HIP_OVERLOAD2(bool, islessgreater); -__HIP_OVERLOAD1(bool, isnan); -//__HIP_OVERLOAD1(bool, isnormal) -__HIP_OVERLOAD2(bool, isunordered); -__DEF_FUN1(double, lgamma) -__DEF_FUN1(double, log) -__DEF_FUN1(double, log10) -__DEF_FUN1(double, log1p) -__DEF_FUN1(double, log2) -__DEF_FUN1(double, logb) -__DEF_FUNI(long long, llrint) -__DEF_FUNI(long long, llround) -__DEF_FUNI(long, lrint) -__DEF_FUNI(long, lround) -__DEF_FUN1(double, nearbyint); -__DEF_FUN2(double, nextafter); -__DEF_FUN2(double, pow); -__DEF_FUN2(double, remainder); -__DEF_FUN1(double, rint); -__DEF_FUN1(double, round); -__HIP_OVERLOAD1(bool, signbit) -__DEF_FUN1(double, sin) -__DEF_FUN1(double, sinh) -__DEF_FUN1(double, sqrt) -__DEF_FUN1(double, tan) -__DEF_FUN1(double, tanh) -__DEF_FUN1(double, tgamma) -__DEF_FUN1(double, trunc); - -// define cmath functions with a float and an integer argument. -#define __DEF_FLOAT_FUN2I(__func) \ - __DEVICE__ \ - inline float __func(float __x, int __y) { return __func##f(__x, __y); } -__DEF_FLOAT_FUN2I(scalbn) -__DEF_FLOAT_FUN2I(ldexp) - -template __DEVICE__ inline T min(T __arg1, T __arg2) { +template __DEVICE__ T min(T __arg1, T __arg2) { return (__arg1 < __arg2) ? __arg1 : __arg2; } -template __DEVICE__ inline T max(T __arg1, T __arg2) { +template __DEVICE__ T max(T __arg1, T __arg2) { return (__arg1 > __arg2) ? __arg1 : __arg2; } -__DEVICE__ inline int min(int __arg1, int __arg2) { +__DEVICE__ int min(int __arg1, int __arg2) { return (__arg1 < __arg2) ? __arg1 : __arg2; } -__DEVICE__ inline int max(int __arg1, int __arg2) { +__DEVICE__ int max(int __arg1, int __arg2) { return (__arg1 > __arg2) ? __arg1 : __arg2; } __DEVICE__ -inline float max(float __x, float __y) { return fmaxf(__x, __y); } +float max(float __x, float __y) { return fmaxf(__x, __y); } __DEVICE__ -inline double max(double __x, double __y) { return fmax(__x, __y); } +double max(double __x, double __y) { return fmax(__x, __y); } __DEVICE__ -inline float min(float __x, float __y) { return fminf(__x, __y); } +float min(float __x, float __y) { return fminf(__x, __y); } __DEVICE__ -inline double min(double __x, double __y) { return fmin(__x, __y); } - -__HIP_OVERLOAD2(double, max) -__HIP_OVERLOAD2(double, min) +double min(double __x, double __y) { return fmin(__x, __y); } __host__ inline static int min(int __arg1, int __arg2) { return std::min(__arg1, __arg2); @@ -1209,47 +1238,8 @@ __host__ inline static int min(int __arg1, int __arg2) { __host__ inline static int max(int __arg1, int __arg2) { return std::max(__arg1, __arg2); } - -#ifdef __cplusplus -__DEVICE__ -inline float pow(float __base, int __iexp) { return powif(__base, __iexp); } - -__DEVICE__ -inline double pow(double __base, int __iexp) { return powi(__base, __iexp); } - -__DEVICE__ -inline _Float16 pow(_Float16 __base, int __iexp) { - return __ocml_pown_f16(__base, __iexp); -} - -__DEVICE__ -inline float remquo(float __x, float __y, int *__quo) { - return remquof(__x, __y, __quo); -} - -template -__DEVICE__ - typename __hip_enable_if::is_specialized && - std::numeric_limits<__T2>::is_specialized, - double>::type - remquo(__T1 __x, __T2 __y, int *__quo) { - return remquo((double)__x, (double)__y, __quo); -} - -__DEVICE__ -inline float frexp(float __x, int *__nptr) { return frexpf(__x, __nptr); } - -__DEVICE__ -inline float modf(float __x, float *__iptr) { return modff(__x, __iptr); } - #endif -#pragma pop_macro("__DEF_FUN1") -#pragma pop_macro("__DEF_FUN2") -#pragma pop_macro("__DEF_FUNI") -#pragma pop_macro("__DEF_FLOAT_FUN2I") -#pragma pop_macro("__HIP_OVERLOAD1") -#pragma pop_macro("__HIP_OVERLOAD2") #pragma pop_macro("__DEVICE__") #pragma pop_macro("__RETURN_TYPE") diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h index 007b5f12ffbdc..81a16a265ae8e 100644 --- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h @@ -55,6 +55,7 @@ static inline __device__ void *free(void *__ptr) { #if !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__ #include <__clang_cuda_math_forward_declares.h> +#include <__clang_hip_cmath.h> #include <__clang_cuda_complex_builtins.h> #include From 260892dff06666957f727310ca78d79789c3a04f Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 6 Oct 2020 14:49:44 +0000 Subject: [PATCH 151/321] [gn build] Port aa2b593f149 --- llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index 58fe63d10294c..111915e0ed563 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -73,6 +73,7 @@ copy("Headers") { "__clang_cuda_math.h", "__clang_cuda_math_forward_declares.h", "__clang_cuda_runtime_wrapper.h", + "__clang_hip_cmath.h", "__clang_hip_libdevice_declares.h", "__clang_hip_math.h", "__clang_hip_runtime_wrapper.h", From 86429c4eaf22b68d31428a708e6579faf9e4f61a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Mon, 5 Oct 2020 22:16:59 +0200 Subject: [PATCH 152/321] [SimplifyLibCalls] Optimize mempcpy_chk to mempcpy --- .../llvm/Analysis/TargetLibraryInfo.def | 3 +++ .../llvm/Transforms/Utils/BuildLibCalls.h | 4 ++++ .../llvm/Transforms/Utils/SimplifyLibCalls.h | 1 + llvm/lib/Analysis/TargetLibraryInfo.cpp | 1 + llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 9 ++++++++ .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 15 ++++++++++++ .../Transforms/InstCombine/fortify-folding.ll | 23 +++++++++++++++++++ .../Analysis/TargetLibraryInfoTest.cpp | 1 + 8 files changed, 57 insertions(+) diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index 36b39f4a0e231..7501d1a304e08 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -366,6 +366,9 @@ TLI_DEFINE_STRING_INTERNAL("__memcpy_chk") /// void *__memmove_chk(void *s1, const void *s2, size_t n, size_t s1size); TLI_DEFINE_ENUM_INTERNAL(memmove_chk) TLI_DEFINE_STRING_INTERNAL("__memmove_chk") +/// void *__mempcpy_chk(void *s1, const void *s2, size_t n, size_t s1size); +TLI_DEFINE_ENUM_INTERNAL(mempcpy_chk) +TLI_DEFINE_STRING_INTERNAL("__mempcpy_chk") /// void *__memset_chk(void *s, char v, size_t n, size_t s1size); TLI_DEFINE_ENUM_INTERNAL(memset_chk) TLI_DEFINE_STRING_INTERNAL("__memset_chk") diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h index 90517e806e02c..e7d41933a6c9f 100644 --- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h @@ -96,6 +96,10 @@ namespace llvm { IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI); + /// Emit a call to the mempcpy function. + Value *emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, + const DataLayout &DL, const TargetLibraryInfo *TLI); + /// Emit a call to the memchr function. This assumes that Ptr is a pointer, /// Val is an i32 value, and Len is an 'intptr_t' value. Value *emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 2819a3468766d..8703434e16967 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -59,6 +59,7 @@ class FortifiedLibCallSimplifier { Value *optimizeStrpCpyChk(CallInst *CI, IRBuilderBase &B, LibFunc Func); Value *optimizeStrpNCpyChk(CallInst *CI, IRBuilderBase &B, LibFunc Func); Value *optimizeStrLenChk(CallInst *CI, IRBuilderBase &B); + Value *optimizeMemPCpyChk(CallInst *CI, IRBuilderBase &B); Value *optimizeMemCCpyChk(CallInst *CI, IRBuilderBase &B); Value *optimizeSNPrintfChk(CallInst *CI, IRBuilderBase &B); Value *optimizeSPrintfChk(CallInst *CI,IRBuilderBase &B); diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index e629d04e5deec..d9b263b3967c4 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -847,6 +847,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy()); case LibFunc_memcpy_chk: + case LibFunc_mempcpy_chk: case LibFunc_memmove_chk: --NumParams; if (!IsSizeTTy(FTy.getParamType(NumParams))) diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 2a0cdf6176109..86e9b48826e3d 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -1076,6 +1076,15 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, return CI; } +Value *llvm::emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { + LLVMContext &Context = B.GetInsertBlock()->getContext(); + return emitLibCall( + LibFunc_mempcpy, B.getInt8PtrTy(), + {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)}, + {Dst, Src, Len}, B, TLI); +} + Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { LLVMContext &Context = B.GetInsertBlock()->getContext(); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index bcda3f3440a34..a904d2550562e 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -3292,6 +3292,19 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, return nullptr; } +Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI, + IRBuilderBase &B) { + const DataLayout &DL = CI->getModule()->getDataLayout(); + if (isFortifiedCallFoldable(CI, 3, 2)) + if (Value *Call = emitMemPCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, DL, TLI)) { + CallInst *NewCI = cast(Call); + NewCI->setAttributes(CI->getAttributes()); + return NewCI; + } + return nullptr; +} + Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, IRBuilderBase &B, LibFunc Func) { @@ -3481,6 +3494,8 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI, switch (Func) { case LibFunc_memcpy_chk: return optimizeMemCpyChk(CI, Builder); + case LibFunc_mempcpy_chk: + return optimizeMemPCpyChk(CI, Builder); case LibFunc_memmove_chk: return optimizeMemMoveChk(CI, Builder); case LibFunc_memset_chk: diff --git a/llvm/test/Transforms/InstCombine/fortify-folding.ll b/llvm/test/Transforms/InstCombine/fortify-folding.ll index 2602640595e65..ea29ecc31d114 100644 --- a/llvm/test/Transforms/InstCombine/fortify-folding.ll +++ b/llvm/test/Transforms/InstCombine/fortify-folding.ll @@ -31,6 +31,28 @@ define i8* @test_not_memccpy() { ret i8* %ret } +define i8* @test_mempcpy() { +; CHECK-LABEL: @test_mempcpy( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 15, i1 false) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 15) +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + %ret = call i8* @__mempcpy_chk(i8* %dst, i8* %src, i64 15, i64 -1) + ret i8* %ret +} + +define i8* @test_not_mempcpy() { +; CHECK-LABEL: @test_not_mempcpy( +; CHECK-NEXT: [[RET:%.*]] = call i8* @__mempcpy_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 60, i64 59) +; CHECK-NEXT: ret i8* [[RET]] +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + %ret = call i8* @__mempcpy_chk(i8* %dst, i8* %src, i64 60, i64 59) + ret i8* %ret +} + define i32 @test_snprintf() { ; CHECK-LABEL: @test_snprintf( ; CHECK-NEXT: [[SNPRINTF:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 60, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0)) @@ -219,6 +241,7 @@ define i32 @test_not_vsprintf() { ret i32 %ret } +declare i8* @__mempcpy_chk(i8*, i8*, i64, i64) declare i8* @__memccpy_chk(i8*, i8*, i32, i64, i64) declare i32 @__snprintf_chk(i8*, i64, i32, i64, i8*, ...) declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...) diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp index db75192640def..6e57a1b3e8e78 100644 --- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp +++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp @@ -481,6 +481,7 @@ TEST_F(TargetLibraryInfoTest, ValidProto) { "declare i8* @__strcpy_chk(i8*, i8*, i64)\n" "declare i8* @__strncpy_chk(i8*, i8*, i64, i64)\n" "declare i8* @__memccpy_chk(i8*, i8*, i32, i64)\n" + "declare i8* @__mempcpy_chk(i8*, i8*, i64, i64)\n" "declare i32 @__snprintf_chk(i8*, i64, i32, i64, i8*, ...)\n" "declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)\n" "declare i8* @__strcat_chk(i8*, i8*, i64)\n" From 42093562a711fec56d561fe7c1800f119ae949bf Mon Sep 17 00:00:00 2001 From: Aaron En Ye Shi Date: Tue, 6 Oct 2020 15:19:00 +0000 Subject: [PATCH 153/321] [HIP] NFC properly reference Differential Revision Committed [HIP] Restructure hip headers to add cmath with typo in commit message. Should be Differential Revision instead of Review. Using this to close the diff. Differential Revision: https://reviews.llvm.org/D88837 From 8d2a0c115e245e86bba4ea1c70e6d34b552031a9 Mon Sep 17 00:00:00 2001 From: Aaron En Ye Shi Date: Tue, 6 Oct 2020 15:26:56 +0000 Subject: [PATCH 154/321] [HIP] NFC Add comments to cmath functions Add missing comments to cmath functions. Differential Revision: https://reviews.llvm.org/D88837 --- clang/lib/Headers/__clang_hip_cmath.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/Headers/__clang_hip_cmath.h b/clang/lib/Headers/__clang_hip_cmath.h index 067c7e6c9d1ba..fea799ead32f7 100644 --- a/clang/lib/Headers/__clang_hip_cmath.h +++ b/clang/lib/Headers/__clang_hip_cmath.h @@ -370,7 +370,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif // Pull the new overloads we defined above into namespace std. -// using ::abs; +// using ::abs; - This may be considered for C++. using ::acos; using ::acosh; using ::asin; @@ -419,8 +419,8 @@ using ::logb; using ::lrint; using ::lround; using ::modf; -// using ::nan; -// using ::nanf; +// using ::nan; - This may be considered for C++. +// using ::nanf; - This may be considered for C++. // using ::nanl; - This is not yet defined. using ::nearbyint; using ::nextafter; From 43c7dc52f12973b306910a161bcf150d70d33504 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 6 Oct 2020 08:26:12 -0700 Subject: [PATCH 155/321] [X86] .code16: temporarily set Mode32Bit when matching an instruction with the data32 prefix PR47632 This allows MC to match `data32 ...` as one instruction instead of two (data32 without insn + insn). The compatibility with GNU as improves: `data32 ljmp` will be matched as ljmpl. `data32 lgdt 4(%eax)` will be matched as `lgdtl` (prefixes: 0x67 0x66, instead of 0x66 0x67). GNU as supports many other `data32 *w` as `*l`. We currently just hard code `data32 callw` and `data32 ljmpw`. Generalizing the suffix replacement is tricky and requires a think about the "bwlq" appending suffix rules in MatchAndEmitATTInstruction. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D88772 --- .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 28 ++++++++++++++----- llvm/test/MC/X86/data-prefix-fail.s | 6 ++-- llvm/test/MC/X86/x86-16.s | 11 +++++--- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 1f594c54c4107..8af1148df7a2c 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -78,6 +78,7 @@ static const char OpPrecedence[] = { class X86AsmParser : public MCTargetAsmParser { ParseInstructionInfo *InstInfo; bool Code16GCC; + unsigned ForcedDataPrefix = 0; enum VEXEncoding { VEXEncoding_Default, @@ -3085,13 +3086,18 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (getLexer().isNot(AsmToken::EndOfStatement)) { StringRef Next = Parser.getTok().getString(); - // Parse data32 call as calll. - if (Next == "call" || Next == "callw") { - getLexer().Lex(); - Name = "calll"; - PatchedName = Name; - isPrefix = false; - } + getLexer().Lex(); + // data32 effectively changes the instruction suffix. + // TODO Generalize. + if (Next == "callw") + Next = "calll"; + if (Next == "ljmpw") + Next = "ljmpl"; + + Name = Next; + PatchedName = Name; + ForcedDataPrefix = X86::Mode32Bit; + isPrefix = false; } } @@ -3779,11 +3785,19 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, if (Prefixes) Inst.setFlags(Prefixes); + // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode + // when matching the instruction. + if (ForcedDataPrefix == X86::Mode32Bit) + SwitchMode(X86::Mode32Bit); // First, try a direct match. FeatureBitset MissingFeatures; unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm, isParsingIntelSyntax()); + if (ForcedDataPrefix == X86::Mode32Bit) { + SwitchMode(X86::Mode16Bit); + ForcedDataPrefix = 0; + } switch (OriginalError) { default: llvm_unreachable("Unexpected match result!"); case Match_Success: diff --git a/llvm/test/MC/X86/data-prefix-fail.s b/llvm/test/MC/X86/data-prefix-fail.s index 638e972ad478f..bd5b62ddc9bed 100644 --- a/llvm/test/MC/X86/data-prefix-fail.s +++ b/llvm/test/MC/X86/data-prefix-fail.s @@ -7,10 +7,8 @@ // ERR64: error: 'data32' is not supported in 64-bit mode // ERR32: error: redundant data32 prefix -// 16: data32 -// 16: encoding: [0x66] -// 16: lgdtw 0 -// 16: encoding: [0x0f,0x01,0x16,0x00,0x00] +// 16: lgdtl 0 +// 16-SAME: encoding: [0x66,0x0f,0x01,0x16,0x00,0x00] data32 lgdt 0 // 64: data16 diff --git a/llvm/test/MC/X86/x86-16.s b/llvm/test/MC/X86/x86-16.s index 277d8a0103581..9f8c639726f9a 100644 --- a/llvm/test/MC/X86/x86-16.s +++ b/llvm/test/MC/X86/x86-16.s @@ -553,6 +553,11 @@ ljmp $0x7ace,$0x7ace data32 call a data32 callw a +// CHECK: ljmpl $1, $2 +// CHECK-NEXT: ljmpl $1, $2 +data32 ljmp $1, $2 +data32 ljmpw $1, $2 + // CHECK: incb %al # encoding: [0xfe,0xc0] incb %al @@ -972,10 +977,8 @@ lretl // CHECK: encoding: [0x66] data32 -// CHECK: data32 -// CHECK: encoding: [0x66] -// CHECK: lgdtw 4(%eax) -// CHECK: encoding: [0x67,0x0f,0x01,0x50,0x04] +// CHECK: lgdtl 4(%eax) +// CHECK-SAME: encoding: [0x67,0x66,0x0f,0x01,0x50,0x04] data32 lgdt 4(%eax) // CHECK: wbnoinvd From 281de8f3613683f805bfa6a202b4d740edf83d27 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 6 Oct 2020 11:27:57 -0400 Subject: [PATCH 156/321] [libc++] Allow retries in two flaky tests --- .../try_lock_shared_until.pass.cpp | 2 ++ .../thread.timedmutex.class/lock.pass.cpp | 1 + 2 files changed, 3 insertions(+) diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_until.pass.cpp index ea54031bb9739..a9513c83cf1f0 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_until.pass.cpp @@ -9,6 +9,8 @@ // UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: c++03, c++11 +// ALLOW_RETRIES: 2 + // shared_timed_mutex was introduced in macosx10.12 // UNSUPPORTED: with_system_cxx_lib=macosx10.11 // UNSUPPORTED: with_system_cxx_lib=macosx10.10 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp index 1de51ec22a56f..782bf057ebb9c 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: libcpp-has-no-threads +// ALLOW_RETRIES: 2 // From 3cb8347c94a0d8ae1295fa4ae686443f49bc18e8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 6 Oct 2020 16:30:09 +0100 Subject: [PATCH 157/321] [APIntTest] Extend extractBits to check 'lshr+trunc' pattern for each case as well. Noticed while triaging PR47731 that we don't have great coverage for such patterns. --- llvm/unittests/ADT/APIntTest.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/llvm/unittests/ADT/APIntTest.cpp b/llvm/unittests/ADT/APIntTest.cpp index 4b8e8c7206523..673a2110af099 100644 --- a/llvm/unittests/ADT/APIntTest.cpp +++ b/llvm/unittests/ADT/APIntTest.cpp @@ -1994,23 +1994,44 @@ TEST(APIntTest, extractBits) { APInt i32(32, 0x1234567); EXPECT_EQ(0x3456, i32.extractBits(16, 4)); + APInt i64(64, 0x01234567FFFFFFFFull); + EXPECT_EQ(0xFFFFFFFF, i64.extractBits(32, 0)); + EXPECT_EQ(0xFFFFFFFF, i64.trunc(32)); + EXPECT_EQ(0x01234567, i64.extractBits(32, 32)); + EXPECT_EQ(0x01234567, i64.lshr(32).trunc(32)); + APInt i257(257, 0xFFFFFFFFFF0000FFull, true); EXPECT_EQ(0xFFu, i257.extractBits(16, 0)); + EXPECT_EQ(0xFFu, i257.lshr(0).trunc(16)); EXPECT_EQ((0xFFu >> 1), i257.extractBits(16, 1)); + EXPECT_EQ((0xFFu >> 1), i257.lshr(1).trunc(16)); EXPECT_EQ(-1, i257.extractBits(32, 64).getSExtValue()); + EXPECT_EQ(-1, i257.lshr(64).trunc(32).getSExtValue()); EXPECT_EQ(-1, i257.extractBits(128, 128).getSExtValue()); + EXPECT_EQ(-1, i257.lshr(128).trunc(128).getSExtValue()); EXPECT_EQ(-1, i257.extractBits(66, 191).getSExtValue()); + EXPECT_EQ(-1, i257.lshr(191).trunc(66).getSExtValue()); EXPECT_EQ(static_cast(0xFFFFFFFFFF80007Full), i257.extractBits(128, 1).getSExtValue()); + EXPECT_EQ(static_cast(0xFFFFFFFFFF80007Full), + i257.lshr(1).trunc(128).getSExtValue()); EXPECT_EQ(static_cast(0xFFFFFFFFFF80007Full), i257.extractBits(129, 1).getSExtValue()); + EXPECT_EQ(static_cast(0xFFFFFFFFFF80007Full), + i257.lshr(1).trunc(129).getSExtValue()); EXPECT_EQ(APInt(48, 0), APInt(144, "281474976710655", 10).extractBits(48, 48)); + EXPECT_EQ(APInt(48, 0), + APInt(144, "281474976710655", 10).lshr(48).trunc(48)); EXPECT_EQ(APInt(48, 0x0000ffffffffffffull), APInt(144, "281474976710655", 10).extractBits(48, 0)); + EXPECT_EQ(APInt(48, 0x0000ffffffffffffull), + APInt(144, "281474976710655", 10).lshr(0).trunc(48)); EXPECT_EQ(APInt(48, 0x00007fffffffffffull), APInt(144, "281474976710655", 10).extractBits(48, 1)); + EXPECT_EQ(APInt(48, 0x00007fffffffffffull), + APInt(144, "281474976710655", 10).lshr(1).trunc(48)); } TEST(APIntTest, extractBitsAsZExtValue) { From 53bf28b80cf9fec53c807922b19e0af2832dfeba Mon Sep 17 00:00:00 2001 From: peter klausler Date: Thu, 1 Oct 2020 11:46:24 -0700 Subject: [PATCH 158/321] [flang] Track CHARACTER length better in TypeAndShape CHARACTER length expressions were not always being captured or computed as part of procedure "characteristics", leading to test failures due to an inability to compute memory size expressions accurately. Differential revision: https://reviews.llvm.org/D88689 --- .../include/flang/Evaluate/characteristics.h | 18 ++++---- flang/lib/Evaluate/characteristics.cpp | 43 ++++++++++++++++--- flang/lib/Evaluate/shape.cpp | 4 +- flang/lib/Semantics/check-call.cpp | 28 ++++++------ 4 files changed, 63 insertions(+), 30 deletions(-) diff --git a/flang/include/flang/Evaluate/characteristics.h b/flang/include/flang/Evaluate/characteristics.h index bde734cd510dc..5d3058694cf97 100644 --- a/flang/include/flang/Evaluate/characteristics.h +++ b/flang/include/flang/Evaluate/characteristics.h @@ -83,10 +83,6 @@ class TypeAndShape { const semantics::Symbol &, FoldingContext &); static std::optional Characterize( const semantics::ObjectEntityDetails &); - static std::optional Characterize( - const semantics::AssocEntityDetails &, FoldingContext &); - static std::optional Characterize( - const semantics::ProcEntityDetails &); static std::optional Characterize( const semantics::ProcInterface &); static std::optional Characterize( @@ -108,7 +104,7 @@ class TypeAndShape { if (type->category() == TypeCategory::Character) { if (const auto *chExpr{UnwrapExpr>(x)}) { if (auto length{chExpr->LEN()}) { - result.set_LEN(Expr{std::move(*length)}); + result.set_LEN(Fold(context, std::move(*length))); } } } @@ -141,8 +137,8 @@ class TypeAndShape { type_ = t; return *this; } - const std::optional> &LEN() const { return LEN_; } - TypeAndShape &set_LEN(Expr &&len) { + const std::optional> &LEN() const { return LEN_; } + TypeAndShape &set_LEN(Expr &&len) { LEN_ = std::move(len); return *this; } @@ -154,16 +150,22 @@ class TypeAndShape { bool IsCompatibleWith(parser::ContextualMessages &, const TypeAndShape &that, const char *thisIs = "POINTER", const char *thatIs = "TARGET", bool isElemental = false) const; + std::optional> MeasureSizeInBytes( + FoldingContext * = nullptr) const; llvm::raw_ostream &Dump(llvm::raw_ostream &) const; private: + static std::optional Characterize( + const semantics::AssocEntityDetails &, FoldingContext &); + static std::optional Characterize( + const semantics::ProcEntityDetails &); void AcquireShape(const semantics::ObjectEntityDetails &); void AcquireLEN(); protected: DynamicType type_; - std::optional> LEN_; + std::optional> LEN_; Shape shape_; Attrs attrs_; int corank_{0}; diff --git a/flang/lib/Evaluate/characteristics.cpp b/flang/lib/Evaluate/characteristics.cpp index a28f4dd004cc1..3206f0a252080 100644 --- a/flang/lib/Evaluate/characteristics.cpp +++ b/flang/lib/Evaluate/characteristics.cpp @@ -65,7 +65,14 @@ std::optional TypeAndShape::Characterize( return std::visit( common::visitors{ [&](const semantics::ObjectEntityDetails &object) { - return Characterize(object); + auto result{Characterize(object)}; + if (result && + result->type().category() == TypeCategory::Character) { + if (auto len{DataRef{symbol}.LEN()}) { + result->set_LEN(Fold(context, std::move(*len))); + } + } + return result; }, [&](const semantics::ProcEntityDetails &proc) { const semantics::ProcInterface &interface{proc.interface()}; @@ -106,7 +113,15 @@ std::optional TypeAndShape::Characterize( const semantics::AssocEntityDetails &assoc, FoldingContext &context) { if (auto type{DynamicType::From(assoc.type())}) { if (auto shape{GetShape(context, assoc.expr())}) { - return TypeAndShape{std::move(*type), std::move(*shape)}; + TypeAndShape result{std::move(*type), std::move(*shape)}; + if (type->category() == TypeCategory::Character) { + if (const auto *chExpr{UnwrapExpr>(assoc.expr())}) { + if (auto len{chExpr->LEN()}) { + result.set_LEN(Fold(context, std::move(*len))); + } + } + } + return std::move(result); } } return std::nullopt; @@ -129,18 +144,32 @@ std::optional TypeAndShape::Characterize( bool TypeAndShape::IsCompatibleWith(parser::ContextualMessages &messages, const TypeAndShape &that, const char *thisIs, const char *thatIs, bool isElemental) const { - const auto &len{that.LEN()}; if (!type_.IsTkCompatibleWith(that.type_)) { + const auto &len{that.LEN()}; messages.Say( "%1$s type '%2$s' is not compatible with %3$s type '%4$s'"_err_en_US, thatIs, that.type_.AsFortran(len ? len->AsFortran() : ""), thisIs, - type_.AsFortran()); + type_.AsFortran(LEN_ ? LEN_->AsFortran() : "")); return false; } return isElemental || CheckConformance(messages, shape_, that.shape_, thisIs, thatIs); } +std::optional> TypeAndShape::MeasureSizeInBytes( + FoldingContext *foldingContext) const { + if (type_.category() == TypeCategory::Character && LEN_) { + Expr result{ + common::Clone(*LEN_) * Expr{type_.kind()}}; + if (foldingContext) { + result = Fold(*foldingContext, std::move(result)); + } + return result; + } else { + return type_.MeasureSizeInBytes(foldingContext); + } +} + void TypeAndShape::AcquireShape(const semantics::ObjectEntityDetails &object) { CHECK(shape_.empty() && !attrs_.test(Attr::AssumedRank)); corank_ = object.coshape().Rank(); @@ -178,7 +207,7 @@ void TypeAndShape::AcquireLEN() { if (type_.category() == TypeCategory::Character) { if (const auto *param{type_.charLength()}) { if (const auto &intExpr{param->GetExplicit()}) { - LEN_ = *intExpr; + LEN_ = ConvertToType(common::Clone(*intExpr)); } } } @@ -445,8 +474,8 @@ bool FunctionResult::operator==(const FunctionResult &that) const { std::optional FunctionResult::Characterize( const Symbol &symbol, const IntrinsicProcTable &intrinsics) { - if (const auto *obj{symbol.detailsIf()}) { - if (auto type{TypeAndShape::Characterize(*obj)}) { + if (const auto *object{symbol.detailsIf()}) { + if (auto type{TypeAndShape::Characterize(*object)}) { FunctionResult result{std::move(*type)}; CopyAttrs(symbol, result, { diff --git a/flang/lib/Evaluate/shape.cpp b/flang/lib/Evaluate/shape.cpp index eb5ec83676703..bfc2447bd3005 100644 --- a/flang/lib/Evaluate/shape.cpp +++ b/flang/lib/Evaluate/shape.cpp @@ -649,9 +649,9 @@ auto GetShapeHelper::operator()(const ProcedureRef &call) const -> Result { auto sourceElements{ GetSize(common::Clone(sourceTypeAndShape->shape()))}; auto sourceElementBytes{ - sourceTypeAndShape->type().MeasureSizeInBytes(&context_)}; + sourceTypeAndShape->MeasureSizeInBytes(&context_)}; auto moldElementBytes{ - moldTypeAndShape->type().MeasureSizeInBytes(&context_)}; + moldTypeAndShape->MeasureSizeInBytes(&context_)}; if (sourceElements && sourceElementBytes && moldElementBytes) { ExtentExpr extent{Fold(context_, ((std::move(*sourceElements) * diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index 7e1d57cf579e5..74cf2f89479a9 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -74,22 +74,24 @@ static void CheckImplicitInterfaceArg( // we extend them on the right with spaces and a warning. static void PadShortCharacterActual(evaluate::Expr &actual, const characteristics::TypeAndShape &dummyType, - const characteristics::TypeAndShape &actualType, - parser::ContextualMessages &messages) { + characteristics::TypeAndShape &actualType, + evaluate::FoldingContext &context, parser::ContextualMessages &messages) { if (dummyType.type().category() == TypeCategory::Character && actualType.type().category() == TypeCategory::Character && dummyType.type().kind() == actualType.type().kind() && GetRank(actualType.shape()) == 0) { - if (auto dummyLEN{ToInt64(dummyType.LEN())}) { - if (auto actualLEN{ToInt64(actualType.LEN())}) { - if (*actualLEN < *dummyLEN) { - messages.Say( - "Actual length '%jd' is less than expected length '%jd'"_en_US, - *actualLEN, *dummyLEN); - auto converted{ConvertToType(dummyType.type(), std::move(actual))}; - CHECK(converted); - actual = std::move(*converted); - } + if (dummyType.LEN() && actualType.LEN()) { + auto dummyLength{ToInt64(Fold(context, common::Clone(*dummyType.LEN())))}; + auto actualLength{ + ToInt64(Fold(context, common::Clone(*actualType.LEN())))}; + if (dummyLength && actualLength && *actualLength < *dummyLength) { + messages.Say( + "Actual length '%jd' is less than expected length '%jd'"_en_US, + *actualLength, *dummyLength); + auto converted{ConvertToType(dummyType.type(), std::move(actual))}; + CHECK(converted); + actual = std::move(*converted); + actualType.set_LEN(SubscriptIntExpr{*dummyLength}); } } } @@ -142,7 +144,7 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, // Basic type & rank checking parser::ContextualMessages &messages{context.messages()}; - PadShortCharacterActual(actual, dummy.type, actualType, messages); + PadShortCharacterActual(actual, dummy.type, actualType, context, messages); ConvertIntegerActual(actual, dummy.type, actualType, messages); bool typesCompatible{dummy.type.type().IsTkCompatibleWith(actualType.type())}; if (typesCompatible) { From f78bb4d84eee55c5d3bb1f3322c1e346e3388572 Mon Sep 17 00:00:00 2001 From: Hafiz Abid Qadeer Date: Mon, 5 Oct 2020 17:28:25 -0400 Subject: [PATCH 159/321] [libc++] Check _LIBCPP_USE_CLOCK_GETTIME before using clock_gettime The clock_gettime function is available when _POSIX_TIMERS is defined. We check for this and set _LIBCPP_USE_CLOCK_GETTIME accordingly since 59b3102739c. But check for _LIBCPP_USE_CLOCK_GETTIME was removed in babd3aefc91. As a result, code is now trying to use clock_gettime even on platforms where it is not available and it is causing build failure with newlib. This patch restores the checks to fix this. Differential Revision: https://reviews.llvm.org/D88825 --- libcxx/src/chrono.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/libcxx/src/chrono.cpp b/libcxx/src/chrono.cpp index f0a5d50ddf77f..c88224c61bdb5 100644 --- a/libcxx/src/chrono.cpp +++ b/libcxx/src/chrono.cpp @@ -13,11 +13,15 @@ #include "include/apple_availability.h" #if __has_include() -#include +# include +#endif + +#if __has_include() +# include // for gettimeofday and timeval #endif #if !defined(__APPLE__) && _POSIX_TIMERS > 0 -#define _LIBCPP_USE_CLOCK_GETTIME +# define _LIBCPP_USE_CLOCK_GETTIME #endif #if defined(_LIBCPP_WIN32API) @@ -27,10 +31,6 @@ # if _WIN32_WINNT >= _WIN32_WINNT_WIN8 # include # endif -#else -# if !defined(CLOCK_REALTIME) -# include // for gettimeofday and timeval -# endif // !defined(CLOCK_REALTIME) #endif // defined(_LIBCPP_WIN32API) #if defined(__ELF__) && defined(_LIBCPP_LINK_RT_LIB) @@ -74,7 +74,7 @@ system_clock::now() _NOEXCEPT static_cast<__int64>(ft.dwLowDateTime)}; return time_point(duration_cast(d - nt_to_unix_epoch)); #else -#if defined(CLOCK_REALTIME) +#if defined(CLOCK_REALTIME) && defined(_LIBCPP_USE_CLOCK_GETTIME) struct timespec tp; if (0 != clock_gettime(CLOCK_REALTIME, &tp)) __throw_system_error(errno, "clock_gettime(CLOCK_REALTIME) failed"); @@ -83,7 +83,7 @@ system_clock::now() _NOEXCEPT timeval tv; gettimeofday(&tv, 0); return time_point(seconds(tv.tv_sec) + microseconds(tv.tv_usec)); -#endif // CLOCK_REALTIME +#endif #endif } From 80cde02e85df3f1903f0a04ccccaf0f84c2854d1 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 6 Oct 2020 18:14:10 +0200 Subject: [PATCH 160/321] [MemCpyOpt] Add separate statistic for call slot optimization (NFC) --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index f14f3d4515dee..68fcf91b3464d 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -70,6 +70,7 @@ STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); +STATISTIC(NumCallSlot, "Number of call slot optimizations performed"); namespace { @@ -942,6 +943,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, LLVMContext::MD_access_group}; combineMetadata(C, cpy, KnownIDs, true); + ++NumCallSlot; return true; } From 6b441ca523cd6be9475106408a60989ad1f5ae55 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 4 Oct 2020 18:04:28 +0200 Subject: [PATCH 161/321] [MemCpyOpt] Check for throwing calls during call slot optimization When performing call slot optimization for a non-local destination, we need to check whether there may be throwing calls between the call and the copy. Otherwise, the early write to the destination may be observable by the caller. This was already done for call slot optimization of load/store, but not for memcpys. For the sake of clarity, I'm moving this check into the common optimization function, even if that does need an additional instruction scan for the load/store case. As efriedma pointed out, this check is not sufficient due to potential accesses from another thread. This case is left as a TODO. Differential Revision: https://reviews.llvm.org/D88799 --- .../llvm/Transforms/Scalar/MemCpyOptimizer.h | 5 +- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 50 ++++++++++++------- llvm/test/Transforms/MemCpyOpt/callslot.ll | 5 +- 3 files changed, 37 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index 5426482ff5263..877aa40e1a3af 100644 --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -61,8 +61,9 @@ class MemCpyOptPass : public PassInfoMixin { bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI); bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI); bool processMemMove(MemMoveInst *M); - bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc, - uint64_t cpyLen, Align cpyAlign, CallInst *C); + bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore, + Value *cpyDst, Value *cpySrc, uint64_t cpyLen, + Align cpyAlign, CallInst *C); bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep); bool processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet); bool performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet); diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 68fcf91b3464d..49f76d37ec0d9 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -659,8 +659,6 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (C) { // Check that nothing touches the dest of the "copy" between // the call and the store. - Value *CpyDest = SI->getPointerOperand()->stripPointerCasts(); - bool CpyDestIsLocal = isa(CpyDest); MemoryLocation StoreLoc = MemoryLocation::get(SI); for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator(); I != E; --I) { @@ -668,18 +666,12 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { C = nullptr; break; } - // The store to dest may never happen if an exception can be thrown - // between the load and the store. - if (I->mayThrow() && !CpyDestIsLocal) { - C = nullptr; - break; - } } } if (C) { bool changed = performCallSlotOptzn( - LI, SI->getPointerOperand()->stripPointerCasts(), + LI, SI, SI->getPointerOperand()->stripPointerCasts(), LI->getPointerOperand()->stripPointerCasts(), DL.getTypeStoreSize(SI->getOperand(0)->getType()), commonAlignment(SI->getAlign(), LI->getAlign()), C); @@ -754,7 +746,8 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { /// Takes a memcpy and a call that it depends on, /// and checks for the possibility of a call slot optimization by having /// the call write its result directly into the destination of the memcpy. -bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, +bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, + Instruction *cpyStore, Value *cpyDest, Value *cpySrc, uint64_t cpyLen, Align cpyAlign, CallInst *C) { // The general transformation to keep in mind is @@ -785,7 +778,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, if (!srcArraySize) return false; - const DataLayout &DL = cpy->getModule()->getDataLayout(); + const DataLayout &DL = cpyLoad->getModule()->getDataLayout(); uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) * srcArraySize->getZExtValue(); @@ -795,6 +788,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. + // TODO: Use isDereferenceablePointer() API instead. if (AllocaInst *A = dyn_cast(cpyDest)) { // The destination is an alloca. Check it is larger than srcSize. ConstantInt *destArraySize = dyn_cast(A->getArraySize()); @@ -807,10 +801,6 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, if (destSize < srcSize) return false; } else if (Argument *A = dyn_cast(cpyDest)) { - // The store to dest may never happen if the call can throw. - if (C->mayThrow()) - return false; - if (A->getDereferenceableBytes() < srcSize) { // If the destination is an sret parameter then only accesses that are // outside of the returned struct type can trap. @@ -833,6 +823,30 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, return false; } + // Make sure that nothing can observe cpyDest being written early. There are + // a number of cases to consider: + // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of + // the transform. + // 2. C itself may not access cpyDest (prior to the transform). This is + // checked further below. + // 3. If cpyDest is accessible to the caller of this function (potentially + // captured and not based on an alloca), we need to ensure that we cannot + // unwind between C and cpyStore. This is checked here. + // 4. If cpyDest is potentially captured, there may be accesses to it from + // another thread. In this case, we need to check that cpyStore is + // guaranteed to be executed if C is. As it is a non-atomic access, it + // renders accesses from other threads undefined. + // TODO: This is currently not checked. + if (!isa(cpyDest)) { + assert(C->getParent() == cpyStore->getParent() && + "call and copy must be in the same block"); + for (const Instruction &I : make_range(C->getIterator(), + cpyStore->getIterator())) { + if (I.mayThrow()) + return false; + } + } + // Check that dest points to memory that is at least as aligned as src. Align srcAlign = srcAlloca->getAlign(); bool isDestSufficientlyAligned = srcAlign <= cpyAlign; @@ -867,7 +881,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, if (IT->isLifetimeStartOrEnd()) continue; - if (U != C && U != cpy) + if (U != C && U != cpyLoad) return false; } @@ -941,7 +955,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, LLVMContext::MD_noalias, LLVMContext::MD_invariant_group, LLVMContext::MD_access_group}; - combineMetadata(C, cpy, KnownIDs, true); + combineMetadata(C, cpyLoad, KnownIDs, true); ++NumCallSlot; return true; @@ -1242,7 +1256,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { // of conservatively taking the minimum? Align Alignment = std::min(M->getDestAlign().valueOrOne(), M->getSourceAlign().valueOrOne()); - if (performCallSlotOptzn(M, M->getDest(), M->getSource(), + if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), CopySize->getZExtValue(), Alignment, C)) { eraseInstruction(M); ++NumMemCpyInstr; diff --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll index a4cfd53f4d249..3b495e5f3fa7f 100644 --- a/llvm/test/Transforms/MemCpyOpt/callslot.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -91,10 +91,9 @@ define void @throw_between_call_and_mempy(i8* dereferenceable(16) %dest.i8) { ; CHECK-LABEL: @throw_between_call_and_mempy( ; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* -; CHECK-NEXT: [[DEST_I81:%.*]] = bitcast i8* [[DEST_I8:%.*]] to [16 x i8]* -; CHECK-NEXT: [[DEST_I812:%.*]] = bitcast [16 x i8]* [[DEST_I81]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DEST_I812]], i8 0, i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC_I8]], i8 0, i64 16, i1 false) ; CHECK-NEXT: call void @may_throw() [[ATTR2:#.*]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DEST_I8:%.*]], i8 0, i64 16, i1 false) ; CHECK-NEXT: ret void ; %src = alloca [16 x i8] From 616f5450480214d40dd69a5f5f0f10b41bd4b3e2 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 4 Oct 2020 21:03:02 +0200 Subject: [PATCH 162/321] [MemCpyOpt] Use dereferenceable pointer helper The call slot optimization has some home-grown code for checking whether the destination is dereferenceable. Replace this with the generic isDereferenceableAndAlignedPointer() helper. I'm not checking alignment here, because that is currently handled separately and may be an enforced alignment for allocas. The clean way of integrating that part would probably be to accept a callback in isDereferenceableAndAlignedPointer() for the actual isAligned check, which would then have a chance to use an enforced alignment instead. This allows the destination to be a GEP (among other things), though the two open TODOs may prevent it from working in practice. Differential Revision: https://reviews.llvm.org/D88805 --- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 38 +++---------------- llvm/test/Transforms/MemCpyOpt/callslot.ll | 5 ++- 2 files changed, 8 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 49f76d37ec0d9..64821c70217bc 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/MemorySSA.h" @@ -788,40 +789,9 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. - // TODO: Use isDereferenceablePointer() API instead. - if (AllocaInst *A = dyn_cast(cpyDest)) { - // The destination is an alloca. Check it is larger than srcSize. - ConstantInt *destArraySize = dyn_cast(A->getArraySize()); - if (!destArraySize) - return false; - - uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) * - destArraySize->getZExtValue(); - - if (destSize < srcSize) - return false; - } else if (Argument *A = dyn_cast(cpyDest)) { - if (A->getDereferenceableBytes() < srcSize) { - // If the destination is an sret parameter then only accesses that are - // outside of the returned struct type can trap. - if (!A->hasStructRetAttr()) - return false; - - Type *StructTy = A->getParamStructRetType(); - if (!StructTy->isSized()) { - // The call may never return and hence the copy-instruction may never - // be executed, and therefore it's not safe to say "the destination - // has at least bytes, as implied by the copy-instruction", - return false; - } - - uint64_t destSize = DL.getTypeAllocSize(StructTy); - if (destSize < srcSize) - return false; - } - } else { + if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen), + DL, C, DT)) return false; - } // Make sure that nothing can observe cpyDest being written early. There are // a number of cases to consider: @@ -837,6 +807,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // guaranteed to be executed if C is. As it is a non-atomic access, it // renders accesses from other threads undefined. // TODO: This is currently not checked. + // TODO: Check underlying object, so we can look through GEPs. if (!isa(cpyDest)) { assert(C->getParent() == cpyStore->getParent() && "call and copy must be in the same block"); @@ -893,6 +864,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. + // TODO: Support moving instructions like GEPs upwards. if (Instruction *cpyDestInst = dyn_cast(cpyDest)) if (!DT->dominates(cpyDestInst, C)) return false; diff --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll index 3b495e5f3fa7f..90f1833a2d5cc 100644 --- a/llvm/test/Transforms/MemCpyOpt/callslot.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -110,8 +110,9 @@ define void @dest_is_gep_nounwind_call() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [8 x i8], align 1 ; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [8 x i8]* [[SRC]] to i8* ; CHECK-NEXT: [[DEST_I8:%.*]] = getelementptr [16 x i8], [16 x i8]* [[DEST]], i64 0, i64 8 -; CHECK-NEXT: call void @accept_ptr(i8* [[SRC_I8]]) [[ATTR3:#.*]] -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST_I8]], i8* [[SRC_I8]], i64 8, i1 false) +; CHECK-NEXT: [[DEST_I81:%.*]] = bitcast i8* [[DEST_I8]] to [8 x i8]* +; CHECK-NEXT: [[DEST_I812:%.*]] = bitcast [8 x i8]* [[DEST_I81]] to i8* +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I812]]) [[ATTR3:#.*]] ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8] From b4264210f2b24b8fb40247f34decd4e14174559e Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Tue, 6 Oct 2020 17:08:34 +0200 Subject: [PATCH 163/321] [AMDGPU] Remove SIInstrInfo::calculateLDSSpillAddress This function does not seem to be used anymore. Differential Revision: https://reviews.llvm.org/D88904 --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 95 -------------------------- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 4 -- 2 files changed, 99 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index abf6869fee152..86eb594489a93 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1488,101 +1488,6 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addMemOperand(MMO); } -/// \param @Offset Offset in bytes of the FrameIndex being spilled -unsigned SIInstrInfo::calculateLDSSpillAddress( - MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg, - unsigned FrameOffset, unsigned Size) const { - MachineFunction *MF = MBB.getParent(); - SIMachineFunctionInfo *MFI = MF->getInfo(); - const DebugLoc &DL = MBB.findDebugLoc(MI); - unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize(); - unsigned WavefrontSize = ST.getWavefrontSize(); - - Register TIDReg = MFI->getTIDReg(); - if (!MFI->hasCalculatedTID()) { - MachineBasicBlock &Entry = MBB.getParent()->front(); - MachineBasicBlock::iterator Insert = Entry.front(); - const DebugLoc &DL = Insert->getDebugLoc(); - - TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass, - *MF); - if (TIDReg == AMDGPU::NoRegister) - return TIDReg; - - if (!AMDGPU::isShader(MF->getFunction().getCallingConv()) && - WorkGroupSize > WavefrontSize) { - Register TIDIGXReg = - MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X); - Register TIDIGYReg = - MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y); - Register TIDIGZReg = - MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); - Register InputPtrReg = - MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); - for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) { - if (!Entry.isLiveIn(Reg)) - Entry.addLiveIn(Reg); - } - - RS->enterBasicBlock(Entry); - // FIXME: Can we scavenge an SReg_64 and access the subregs? - Register STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); - Register STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); - BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0) - .addReg(InputPtrReg) - .addImm(SI::KernelInputOffsets::NGROUPS_Z); - BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1) - .addReg(InputPtrReg) - .addImm(SI::KernelInputOffsets::NGROUPS_Y); - - // NGROUPS.X * NGROUPS.Y - BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1) - .addReg(STmp1) - .addReg(STmp0); - // (NGROUPS.X * NGROUPS.Y) * TIDIG.X - BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg) - .addReg(STmp1) - .addReg(TIDIGXReg); - // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X) - BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg) - .addReg(STmp0) - .addReg(TIDIGYReg) - .addReg(TIDReg); - // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z - getAddNoCarry(Entry, Insert, DL, TIDReg) - .addReg(TIDReg) - .addReg(TIDIGZReg) - .addImm(0); // clamp bit - } else { - // Get the wave id - BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64), - TIDReg) - .addImm(-1) - .addImm(0); - - BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64), - TIDReg) - .addImm(-1) - .addReg(TIDReg); - } - - BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32), - TIDReg) - .addImm(2) - .addReg(TIDReg); - MFI->setTIDReg(TIDReg); - } - - // Add FrameIndex to LDS offset - unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize); - getAddNoCarry(MBB, MI, DL, TmpReg) - .addImm(LDSOffset) - .addReg(TIDReg) - .addImm(0); // clamp bit - - return TmpReg; -} - void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Count) const { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 8ce41c4ba8b3a..08bf3d27c74dd 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -201,10 +201,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override; - unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI, - RegScavenger *RS, unsigned TmpReg, - unsigned Offset, unsigned Size) const; - void materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, From 6c7d713cf5d9bb188f1e73452a256386f0288bf7 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 6 Oct 2020 17:32:35 +0100 Subject: [PATCH 164/321] [X86][SSE] combineX86ShuffleChain add 'CanonicalizeShuffleInput' helper. NFCI. As part of PR45974, we're getting closer to not creating 'padded' vectors on-the-fly in combineX86ShufflesRecursively, and only pad the source inputs if we have a definite match inside combineX86ShuffleChain. At the moment combineX86ShuffleChain just has to bitcast an input to the correct shuffle type, but eventually we'll need to pad them as well. So, move the bitcast into a 'CanonicalizeShuffleInput helper for now, making the diff for future padding support a lot smaller. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 100 +++++++++++++----------- 1 file changed, 54 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bd80812d5b101..66986a1b9c108 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35013,6 +35013,12 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, unsigned RootSizeInBits = RootVT.getSizeInBits(); unsigned NumRootElts = RootVT.getVectorNumElements(); + // Canonicalize shuffle input op to the requested type. + // TODO: Support cases where Op is smaller than VT. + auto CanonicalizeShuffleInput = [&](MVT VT, SDValue Op) { + return DAG.getBitcast(VT, Op); + }; + // Find the inputs that enter the chain. Note that multiple uses are OK // here, we're not going to remove the operands we find. bool UnaryShuffle = (Inputs.size() == 1); @@ -35031,7 +35037,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, unsigned NumBaseMaskElts = BaseMask.size(); if (NumBaseMaskElts == 1) { assert(BaseMask[0] == 0 && "Invalid shuffle index found!"); - return DAG.getBitcast(RootVT, V1); + return CanonicalizeShuffleInput(RootVT, V1); } bool OptForSize = DAG.shouldOptForSize(); @@ -35055,8 +35061,9 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, // we can just use the broadcast directly. This works for smaller broadcast // elements as well as they already repeat across each mask element if (UnaryShuffle && isTargetShuffleSplat(V1) && !isAnyZero(BaseMask) && - (BaseMaskEltSizeInBits % V1.getScalarValueSizeInBits()) == 0) { - return DAG.getBitcast(RootVT, V1); + (BaseMaskEltSizeInBits % V1.getScalarValueSizeInBits()) == 0 && + V1.getValueSizeInBits() >= RootSizeInBits) { + return CanonicalizeShuffleInput(RootVT, V1); } // Attempt to match a subvector broadcast. @@ -35089,7 +35096,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return SDValue(); // Nothing to do! assert(isInRange(BaseMask[0], 0, NumBaseMaskElts) && "Unexpected lane shuffle"); - Res = DAG.getBitcast(ShuffleVT, V1); + Res = CanonicalizeShuffleInput(ShuffleVT, V1); unsigned SubIdx = BaseMask[0] * (8 / NumBaseMaskElts); bool UseZero = isAnyZero(BaseMask); Res = extractSubVector(Res, SubIdx, DAG, DL, BaseMaskEltSizeInBits); @@ -35103,8 +35110,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, BaseMask, Mask); // Try to lower to vshuf64x2/vshuf32x4. - auto MatchSHUF128 = [](MVT ShuffleVT, const SDLoc &DL, ArrayRef Mask, - SDValue V1, SDValue V2, SelectionDAG &DAG) { + auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL, ArrayRef Mask, + SDValue V1, SDValue V2, SelectionDAG &DAG) { unsigned PermMask = 0; // Insure elements came from the same Op. SDValue Ops[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)}; @@ -35127,8 +35134,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } return DAG.getNode(X86ISD::SHUF128, DL, ShuffleVT, - DAG.getBitcast(ShuffleVT, Ops[0]), - DAG.getBitcast(ShuffleVT, Ops[1]), + CanonicalizeShuffleInput(ShuffleVT, Ops[0]), + CanonicalizeShuffleInput(ShuffleVT, Ops[1]), DAG.getTargetConstant(PermMask, DL, MVT::i8)); }; @@ -35161,7 +35168,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR) return SDValue(); // Nothing to do! assert(isInRange(BaseMask[0], 0, 2) && "Unexpected lane shuffle"); - Res = DAG.getBitcast(ShuffleVT, V1); + Res = CanonicalizeShuffleInput(ShuffleVT, V1); Res = extract128BitVector(Res, BaseMask[0] * 2, DAG, DL); Res = widenSubVector(Res, BaseMask[1] == SM_SentinelZero, Subtarget, DAG, DL, 256); @@ -35181,7 +35188,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0); PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4); - Res = DAG.getBitcast(ShuffleVT, V1); + Res = CanonicalizeShuffleInput(ShuffleVT, V1); Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res, DAG.getUNDEF(ShuffleVT), DAG.getTargetConstant(PermMask, DL, MVT::i8)); @@ -35202,11 +35209,12 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, PermMask |= ((BaseMask[0] & 3) << 0); PermMask |= ((BaseMask[1] & 3) << 4); - Res = DAG.getNode( - X86ISD::VPERM2X128, DL, ShuffleVT, - DAG.getBitcast(ShuffleVT, isInRange(BaseMask[0], 0, 2) ? V1 : V2), - DAG.getBitcast(ShuffleVT, isInRange(BaseMask[1], 0, 2) ? V1 : V2), - DAG.getTargetConstant(PermMask, DL, MVT::i8)); + SDValue LHS = isInRange(BaseMask[0], 0, 2) ? V1 : V2; + SDValue RHS = isInRange(BaseMask[1], 0, 2) ? V1 : V2; + Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, + CanonicalizeShuffleInput(ShuffleVT, LHS), + CanonicalizeShuffleInput(ShuffleVT, RHS), + DAG.getTargetConstant(PermMask, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } } @@ -35282,7 +35290,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (Subtarget.hasAVX2()) { if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST) return SDValue(); // Nothing to do! - Res = DAG.getBitcast(MaskVT, V1); + Res = CanonicalizeShuffleInput(MaskVT, V1); Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); return DAG.getBitcast(RootVT, Res); } @@ -35297,7 +35305,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - Res = DAG.getBitcast(ShuffleSrcVT, NewV1); + Res = CanonicalizeShuffleInput(ShuffleSrcVT, NewV1); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); return DAG.getBitcast(RootVT, Res); } @@ -35309,7 +35317,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - Res = DAG.getBitcast(ShuffleVT, V1); + Res = CanonicalizeShuffleInput(ShuffleVT, V1); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); @@ -35330,8 +35338,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS) return SDValue(); // Nothing to do! Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, - DAG.getBitcast(MVT::v4f32, SrcV1), - DAG.getBitcast(MVT::v4f32, SrcV2), + CanonicalizeShuffleInput(MVT::v4f32, SrcV1), + CanonicalizeShuffleInput(MVT::v4f32, SrcV2), DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } @@ -35344,8 +35352,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return SDValue(); // Nothing to do! PermuteImm = (/*DstIdx*/2 << 4) | (/*SrcIdx*/0 << 0); Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, - DAG.getBitcast(MVT::v4f32, V1), - DAG.getBitcast(MVT::v4f32, V2), + CanonicalizeShuffleInput(MVT::v4f32, V1), + CanonicalizeShuffleInput(MVT::v4f32, V2), DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } @@ -35359,8 +35367,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, (!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1); - NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2); + NewV1 = CanonicalizeShuffleInput(ShuffleSrcVT, NewV1); + NewV2 = CanonicalizeShuffleInput(ShuffleSrcVT, NewV2); Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2); return DAG.getBitcast(RootVT, Res); } @@ -35373,8 +35381,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, (!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - NewV1 = DAG.getBitcast(ShuffleVT, NewV1); - NewV2 = DAG.getBitcast(ShuffleVT, NewV2); + NewV1 = CanonicalizeShuffleInput(ShuffleVT, NewV1); + NewV2 = CanonicalizeShuffleInput(ShuffleVT, NewV2); Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2, DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); @@ -35391,7 +35399,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, Zeroable)) { if (Depth == 0 && Root.getOpcode() == X86ISD::EXTRQI) return SDValue(); // Nothing to do! - V1 = DAG.getBitcast(IntMaskVT, V1); + V1 = CanonicalizeShuffleInput(IntMaskVT, V1); Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, DAG.getTargetConstant(BitLen, DL, MVT::i8), DAG.getTargetConstant(BitIdx, DL, MVT::i8)); @@ -35401,8 +35409,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTQI) return SDValue(); // Nothing to do! - V1 = DAG.getBitcast(IntMaskVT, V1); - V2 = DAG.getBitcast(IntMaskVT, V2); + V1 = CanonicalizeShuffleInput(IntMaskVT, V1); + V2 = CanonicalizeShuffleInput(IntMaskVT, V2); Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2, DAG.getTargetConstant(BitLen, DL, MVT::i8), DAG.getTargetConstant(BitIdx, DL, MVT::i8)); @@ -35421,7 +35429,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, IsTRUNCATE ? (unsigned)ISD::TRUNCATE : (unsigned)X86ISD::VTRUNC; if (Depth == 0 && Root.getOpcode() == Opc) return SDValue(); // Nothing to do! - V1 = DAG.getBitcast(ShuffleSrcVT, V1); + V1 = CanonicalizeShuffleInput(ShuffleSrcVT, V1); Res = DAG.getNode(Opc, DL, ShuffleVT, V1); if (ShuffleVT.getSizeInBits() < RootSizeInBits) Res = widenSubVector(Res, true, Subtarget, DAG, DL, RootSizeInBits); @@ -35438,8 +35446,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return SDValue(); // Nothing to do! ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2); ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts / 2); - V1 = DAG.getBitcast(ShuffleSrcVT, V1); - V2 = DAG.getBitcast(ShuffleSrcVT, V2); + V1 = CanonicalizeShuffleInput(ShuffleSrcVT, V1); + V2 = CanonicalizeShuffleInput(ShuffleSrcVT, V2); ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2); ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts); Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShuffleSrcVT, V1, V2); @@ -35468,7 +35476,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (Subtarget.hasAVX2() && (MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) { SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); - Res = DAG.getBitcast(MaskVT, V1); + Res = CanonicalizeShuffleInput(MaskVT, V1); Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res); return DAG.getBitcast(RootVT, Res); } @@ -35480,7 +35488,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || (Subtarget.hasVBMI() && (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8))) { - V1 = DAG.getBitcast(MaskVT, V1); + V1 = CanonicalizeShuffleInput(MaskVT, V1); V2 = DAG.getUNDEF(MaskVT); Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); return DAG.getBitcast(RootVT, Res); @@ -35503,7 +35511,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, for (unsigned i = 0; i != NumMaskElts; ++i) if (Mask[i] == SM_SentinelZero) Mask[i] = NumMaskElts + i; - V1 = DAG.getBitcast(MaskVT, V1); + V1 = CanonicalizeShuffleInput(MaskVT, V1); V2 = getZeroVector(MaskVT, Subtarget, DAG, DL); Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); return DAG.getBitcast(RootVT, Res); @@ -35528,8 +35536,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || (Subtarget.hasVBMI() && AllowBWIVPERMV3 && (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { - V1 = DAG.getBitcast(MaskVT, V1); - V2 = DAG.getBitcast(MaskVT, V2); + V1 = CanonicalizeShuffleInput(MaskVT, V1); + V2 = CanonicalizeShuffleInput(MaskVT, V2); Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); return DAG.getBitcast(RootVT, Res); } @@ -35556,7 +35564,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, EltBits[i] = AllOnes; } SDValue BitMask = getConstVector(EltBits, UndefElts, MaskVT, DAG, DL); - Res = DAG.getBitcast(MaskVT, V1); + Res = CanonicalizeShuffleInput(MaskVT, V1); unsigned AndOpcode = MaskVT.isFloatingPoint() ? unsigned(X86ISD::FAND) : unsigned(ISD::AND); Res = DAG.getNode(AndOpcode, DL, MaskVT, Res, BitMask); @@ -35576,7 +35584,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, VPermIdx.push_back(Idx); } SDValue VPermMask = DAG.getBuildVector(IntMaskVT, DL, VPermIdx); - Res = DAG.getBitcast(MaskVT, V1); + Res = CanonicalizeShuffleInput(MaskVT, V1); Res = DAG.getNode(X86ISD::VPERMILPV, DL, MaskVT, Res, VPermMask); return DAG.getBitcast(RootVT, Res); } @@ -35608,8 +35616,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, Index = (MaskVT.getScalarSizeInBits() == 64 ? Index << 1 : Index); VPerm2Idx.push_back(Index); } - V1 = DAG.getBitcast(MaskVT, V1); - V2 = DAG.getBitcast(MaskVT, V2); + V1 = CanonicalizeShuffleInput(MaskVT, V1); + V2 = CanonicalizeShuffleInput(MaskVT, V2); SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true); Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp, DAG.getTargetConstant(M2ZImm, DL, MVT::i8)); @@ -35643,7 +35651,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8)); } MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes); - Res = DAG.getBitcast(ByteVT, V1); + Res = CanonicalizeShuffleInput(ByteVT, V1); SDValue PSHUFBMaskOp = DAG.getBuildVector(ByteVT, DL, PSHUFBMask); Res = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Res, PSHUFBMaskOp); return DAG.getBitcast(RootVT, Res); @@ -35673,8 +35681,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, VPPERMMask.push_back(DAG.getConstant(M, DL, MVT::i8)); } MVT ByteVT = MVT::v16i8; - V1 = DAG.getBitcast(ByteVT, V1); - V2 = DAG.getBitcast(ByteVT, V2); + V1 = CanonicalizeShuffleInput(ByteVT, V1); + V2 = CanonicalizeShuffleInput(ByteVT, V2); SDValue VPPERMMaskOp = DAG.getBuildVector(ByteVT, DL, VPPERMMask); Res = DAG.getNode(X86ISD::VPPERM, DL, ByteVT, V1, V2, VPPERMMaskOp); return DAG.getBitcast(RootVT, Res); @@ -35700,8 +35708,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || (Subtarget.hasVBMI() && AllowBWIVPERMV3 && (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { - V1 = DAG.getBitcast(MaskVT, V1); - V2 = DAG.getBitcast(MaskVT, V2); + V1 = CanonicalizeShuffleInput(MaskVT, V1); + V2 = CanonicalizeShuffleInput(MaskVT, V2); Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); return DAG.getBitcast(RootVT, Res); } From c781dc74a8b282eb4c6f3aa48982c5de898611a2 Mon Sep 17 00:00:00 2001 From: Fanbo Meng Date: Tue, 6 Oct 2020 13:13:01 -0400 Subject: [PATCH 165/321] [SystemZ][z/OS] Set default alignment rules for z/OS target Set the default alignment control variables for z/OS target and add test case for alignment rules on z/OS. Reviewed By: abhina.sreeskantharajan Differential Revision: https://reviews.llvm.org/D88845 --- clang/lib/Basic/Targets/OSTargets.h | 5 ++ clang/test/CodeGen/zos-alignment.c | 129 ++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 clang/test/CodeGen/zos-alignment.c diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index 9b96690f413cb..7445dbe727943 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -779,6 +779,11 @@ class LLVM_LIBRARY_VISIBILITY ZOSTargetInfo : public OSTargetInfo { ZOSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { this->WCharType = TargetInfo::UnsignedInt; + this->UseBitFieldTypeAlignment = false; + this->UseZeroLengthBitfieldAlignment = true; + this->ZeroLengthBitfieldBoundary = 32; + this->MinGlobalAlign = 0; + this->DefaultAlignForAttributeAligned = 128; } }; diff --git a/clang/test/CodeGen/zos-alignment.c b/clang/test/CodeGen/zos-alignment.c new file mode 100644 index 0000000000000..5a72096a56f6a --- /dev/null +++ b/clang/test/CodeGen/zos-alignment.c @@ -0,0 +1,129 @@ +// RUN: %clang_cc1 -emit-llvm -triple s390x-none-zos -fdump-record-layouts %s -o - | FileCheck %s + +struct s0 { + short a:3; + long b:5; + int c:1; + long d:10; + char e:5; +} S0; +// CHECK: 0 | struct s0 +// CHECK-NEXT: 0:0-2 | short a +// CHECK-NEXT: 0:3-7 | long b +// CHECK-NEXT: 1:0-0 | int c +// CHECK-NEXT: 1:1-10 | long d +// CHECK-NEXT: 2:3-7 | char e +// CHECK-NEXT: | [sizeof=3, align=1] + +struct s1 { + char a:7; + long b:27; + int c:2; +} S1; +// CHECK: 0 | struct s1 +// CHECK-NEXT: 0:0-6 | char a +// CHECK-NEXT: 0:7-33 | long b +// CHECK-NEXT: 4:2-3 | int c +// CHECK-NEXT: | [sizeof=5, align=1] + +struct s2 { + char a:7; + char :0; + short :0; + short :0; +} S2; +// CHECK: 0 | struct s2 +// CHECK-NEXT: 0:0-6 | char a +// CHECK-NEXT: 4:- | char +// CHECK-NEXT: 4:- | short +// CHECK-NEXT: 4:- | short +// CHECK-NEXT: | [sizeof=4, align=4] + +struct s3 { + int a; + int b:16; + char :0; + char c:1; +} S3; +// CHECK: 0 | struct s3 +// CHECK-NEXT: 0 | int a +// CHECK-NEXT: 4:0-15 | int b +// CHECK-NEXT: 8:- | char +// CHECK-NEXT: 8:0-0 | char c +// CHECK-NEXT: | [sizeof=12, align=4] + +struct s4 { + unsigned int __attribute__((aligned(32))) a; +} S4; +// CHECK: 0 | struct s4 +// CHECK-NEXT: 0 | unsigned int a +// CHECK-NEXT: | [sizeof=32, align=32] + +struct s5 { + char a; + int b:19 __attribute__((aligned(4))); + int c:22 __attribute__((aligned(8))); + int :0; + int d:10; +} S5; +// CHECK: 0 | struct s5 +// CHECK-NEXT: 0 | char a +// CHECK-NEXT: 4:0-18 | int b +// CHECK-NEXT: 8:0-21 | int c +// CHECK-NEXT: 12:- | int +// CHECK-NEXT: 12:0-9 | int d +// CHECK-NEXT: | [sizeof=16, align=8] + +struct s6 { + char * a; + char * b[]; +} S6; +// CHECK: 0 | struct s6 +// CHECK-NEXT: 0 | char * a +// CHECK-NEXT: 8 | char *[] b +// CHECK-NEXT: | [sizeof=8, align=8] + +struct s10 { + unsigned int __attribute__((aligned)) a; +} S10; +// CHECK: 0 | struct s10 +// CHECK-NEXT: 0 | unsigned int a +// CHECK-NEXT: | [sizeof=16, align=16] + +union u0 { + unsigned short d1 __attribute__((packed)); + int d2:10; + long d3; +} U0 __attribute__((aligned(8))); +// CHECK: 0 | union u0 +// CHECK-NEXT: 0 | unsigned short d1 +// CHECK-NEXT: 0:0-9 | int d2 +// CHECK-NEXT: 0 | long d3 +// CHECK-NEXT: | [sizeof=8, align=8] + +union u1 { + unsigned int :0; + short a; +} U1; +// CHECK: 0 | union u1 +// CHECK-NEXT: 0:- | unsigned int +// CHECK-NEXT: 0 | short a +// CHECK-NEXT: | [sizeof=4, align=4] + +union u2 { + long :0; + short a; +} U2; +// CHECK: 0 | union u2 +// CHECK-NEXT: 0:- | long +// CHECK-NEXT: 0 | short a +// CHECK-NEXT: | [sizeof=8, align=8] + +union u3 { + unsigned char :0; + unsigned short :0; +} U3; +// CHECK: 0 | union u3 +// CHECK-NEXT: 0:- | unsigned char +// CHECK-NEXT: 0:- | unsigned short +// CHECK-NEXT: | [sizeof=0, align=4] From acce6b6082684d601e0375818260226259d96b7a Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 6 Oct 2020 10:05:43 -0700 Subject: [PATCH 166/321] [AMDGPU] Create isGFX9Plus utility function Introduce a utility function to make it more convenient to write code that is the same on the GFX9 and GFX10 subtargets. Use isGFX9Plus in the AsmParser for AMDGPU. Authored By: Joe_Nash Differential Revision: https://reviews.llvm.org/D88908 --- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 6 +++++- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 4 ++++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index fae814a7871d4..9e5c7b828c901 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1190,6 +1190,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser { return AMDGPU::isGFX9(getSTI()); } + bool isGFX9Plus() const { + return AMDGPU::isGFX9Plus(getSTI()); + } + bool isGFX10() const { return AMDGPU::isGFX10(getSTI()); } @@ -4699,7 +4703,7 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); R.isValid(); ++R) { if (*R == RegNo) - return isGFX9() || isGFX10(); + return isGFX9Plus(); } // GFX10 has 2 more SGPRs 104 and 105. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 92cbbf336f937..ae0ff43a80a47 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1078,6 +1078,10 @@ bool isGFX9(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; } +bool isGFX9Plus(const MCSubtargetInfo &STI) { + return isGFX9(STI) || isGFX10(STI); +} + bool isGFX10(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index c5feadb98f13e..e652603bee550 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -591,6 +591,7 @@ bool isSI(const MCSubtargetInfo &STI); bool isCI(const MCSubtargetInfo &STI); bool isVI(const MCSubtargetInfo &STI); bool isGFX9(const MCSubtargetInfo &STI); +bool isGFX9Plus(const MCSubtargetInfo &STI); bool isGFX10(const MCSubtargetInfo &STI); bool isGCN3Encoding(const MCSubtargetInfo &STI); bool isGFX10_BEncoding(const MCSubtargetInfo &STI); From 92e83afe44fbfd81ffd428bb41b7f760eee712f9 Mon Sep 17 00:00:00 2001 From: Thomas Raoux Date: Tue, 6 Oct 2020 09:56:35 -0700 Subject: [PATCH 167/321] [mlir][vector] Fold extractOp coming from broadcastOp Combine ExtractOp with scalar result with BroadcastOp source. This is useful to be able to incrementally convert degenerated vector of one element into scalar. Differential Revision: https://reviews.llvm.org/D88751 --- mlir/lib/Dialect/Vector/VectorOps.cpp | 33 +++++++++++++++ mlir/test/Dialect/Vector/canonicalize.mlir | 48 ++++++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 672ad4058309a..b71102cde1cf6 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -812,6 +812,37 @@ static Value foldExtractOpFromInsertChainAndTranspose(ExtractOp extractOp) { return Value(); } +/// Fold extractOp with scalar result coming from BroadcastOp. +static Value foldExtractFromBroadcast(ExtractOp extractOp) { + auto broadcastOp = extractOp.vector().getDefiningOp(); + if (!broadcastOp) + return Value(); + if (extractOp.getType() == broadcastOp.getSourceType()) + return broadcastOp.source(); + auto getRank = [](Type type) { + return type.isa() ? type.cast().getRank() : 0; + }; + unsigned broadcasrSrcRank = getRank(broadcastOp.getSourceType()); + unsigned extractResultRank = getRank(extractOp.getType()); + if (extractResultRank < broadcasrSrcRank) { + auto extractPos = extractVector(extractOp.position()); + unsigned rankDiff = broadcasrSrcRank - extractResultRank; + extractPos.erase( + extractPos.begin(), + std::next(extractPos.begin(), extractPos.size() - rankDiff)); + extractOp.setOperand(broadcastOp.source()); + // OpBuilder is only used as a helper to build an I64ArrayAttr. + OpBuilder b(extractOp.getContext()); + extractOp.setAttr(ExtractOp::getPositionAttrName(), + b.getI64ArrayAttr(extractPos)); + return extractOp.getResult(); + } + // TODO: In case the rank of the broadcast source is greater than the rank of + // the extract result this can be combined into a new broadcast op. This needs + // to be added a canonicalization pattern if needed. + return Value(); +} + OpFoldResult ExtractOp::fold(ArrayRef) { if (succeeded(foldExtractOpFromExtractChain(*this))) return getResult(); @@ -819,6 +850,8 @@ OpFoldResult ExtractOp::fold(ArrayRef) { return getResult(); if (auto val = foldExtractOpFromInsertChainAndTranspose(*this)) return val; + if (auto val = foldExtractFromBroadcast(*this)) + return val; return OpFoldResult(); } diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index 9c36f7684baf9..2f927a1bbc810 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -348,6 +348,54 @@ func @fold_extract_transpose( // ----- +// CHECK-LABEL: fold_extract_broadcast +// CHECK-SAME: %[[A:.*]]: f32 +// CHECK: return %[[A]] : f32 +func @fold_extract_broadcast(%a : f32) -> f32 { + %b = vector.broadcast %a : f32 to vector<1x2x4xf32> + %r = vector.extract %b[0, 1, 2] : vector<1x2x4xf32> + return %r : f32 +} + +// ----- + +// CHECK-LABEL: fold_extract_broadcast_vector +// CHECK-SAME: %[[A:.*]]: vector<4xf32> +// CHECK: return %[[A]] : vector<4xf32> +func @fold_extract_broadcast_vector(%a : vector<4xf32>) -> vector<4xf32> { + %b = vector.broadcast %a : vector<4xf32> to vector<1x2x4xf32> + %r = vector.extract %b[0, 1] : vector<1x2x4xf32> + return %r : vector<4xf32> +} + +// ----- + +// CHECK-LABEL: fold_extract_broadcast +// CHECK-SAME: %[[A:.*]]: vector<4xf32> +// CHECK: %[[R:.*]] = vector.extract %[[A]][2] : vector<4xf32> +// CHECK: return %[[R]] : f32 +func @fold_extract_broadcast(%a : vector<4xf32>) -> f32 { + %b = vector.broadcast %a : vector<4xf32> to vector<1x2x4xf32> + %r = vector.extract %b[0, 1, 2] : vector<1x2x4xf32> + return %r : f32 +} + +// ----- + +// Negative test for extract_op folding when the type of broadcast source +// doesn't match the type of vector.extract. +// CHECK-LABEL: fold_extract_broadcast_negative +// CHECK: %[[B:.*]] = vector.broadcast %{{.*}} : f32 to vector<1x2x4xf32> +// CHECK: %[[R:.*]] = vector.extract %[[B]][0, 1] : vector<1x2x4xf32> +// CHECK: return %[[R]] : vector<4xf32> +func @fold_extract_broadcast_negative(%a : f32) -> vector<4xf32> { + %b = vector.broadcast %a : f32 to vector<1x2x4xf32> + %r = vector.extract %b[0, 1] : vector<1x2x4xf32> + return %r : vector<4xf32> +} + +// ----- + // CHECK-LABEL: fold_vector_transfers func @fold_vector_transfers(%A: memref) -> (vector<4x8xf32>, vector<4x9xf32>) { %c0 = constant 0 : index From e4a9e4ef554a90637c53d4f10326c262df69b76c Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Mon, 5 Oct 2020 22:45:35 +0000 Subject: [PATCH 168/321] [AMDGPU] Emit correct kernel descriptor on big-endian hosts Previously we wrote multi-byte values out as-is from host memory. Use the `emitIntN` helpers in `MCStreamer` to produce a valid descriptor irrespective of the host endianness. Reviewed By: arsenm, rochauha Differential Revision: https://reviews.llvm.org/D88858 --- .../MCTargetDesc/AMDGPUTargetStreamer.cpp | 22 ++++++++++--------- llvm/test/MC/AMDGPU/hsa-sgpr-init-bug-v3.s | 3 --- llvm/test/MC/AMDGPU/hsa-v3.s | 3 --- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 344f22d412e73..9d662eca45a66 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -656,9 +656,10 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED); Streamer.emitLabel(KernelDescriptorSymbol); - Streamer.emitBytes(StringRef( - (const char*)&(KernelDescriptor), - offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset))); + Streamer.emitInt32(KernelDescriptor.group_segment_fixed_size); + Streamer.emitInt32(KernelDescriptor.private_segment_fixed_size); + for (uint8_t Res : KernelDescriptor.reserved0) + Streamer.emitInt8(Res); // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The // expression being created is: // (start of kernel code) - (start of kernel descriptor) @@ -670,11 +671,12 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context), Context), sizeof(KernelDescriptor.kernel_code_entry_byte_offset)); - Streamer.emitBytes(StringRef( - (const char*)&(KernelDescriptor) + - offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) + - sizeof(KernelDescriptor.kernel_code_entry_byte_offset), - sizeof(KernelDescriptor) - - offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) - - sizeof(KernelDescriptor.kernel_code_entry_byte_offset))); + for (uint8_t Res : KernelDescriptor.reserved1) + Streamer.emitInt8(Res); + Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc3); + Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc1); + Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc2); + Streamer.emitInt16(KernelDescriptor.kernel_code_properties); + for (uint8_t Res : KernelDescriptor.reserved2) + Streamer.emitInt8(Res); } diff --git a/llvm/test/MC/AMDGPU/hsa-sgpr-init-bug-v3.s b/llvm/test/MC/AMDGPU/hsa-sgpr-init-bug-v3.s index 644d957d59ea1..35237c439ef95 100644 --- a/llvm/test/MC/AMDGPU/hsa-sgpr-init-bug-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-sgpr-init-bug-v3.s @@ -1,9 +1,6 @@ // RUN: llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj < %s > %t // RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s -// big endian not supported -// XFAIL: host-byteorder-big-endian - // Check that SGPR init bug on gfx803 is corrected by the assembler, setting // GRANULATED_WAVEFRONT_SGPR_COUNT to 11. diff --git a/llvm/test/MC/AMDGPU/hsa-v3.s b/llvm/test/MC/AMDGPU/hsa-v3.s index 3c2ddecefcc13..a995b1be16b65 100644 --- a/llvm/test/MC/AMDGPU/hsa-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-v3.s @@ -3,9 +3,6 @@ // RUN: llvm-readelf -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s // RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s -// big endian not supported -// XFAIL: host-byteorder-big-endian - // READOBJ: Section Headers // READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 // READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 From c9f1c50fc092fa99ba3f527a7401205a59a73c45 Mon Sep 17 00:00:00 2001 From: Konrad Dobros Date: Tue, 6 Oct 2020 13:33:41 -0400 Subject: [PATCH 169/321] [mlir][spirv] Fix extended insts deserialization generation This change replaces container used for storing temporary strings for generated code to std::list. SmallVector may reallocate internal data, which will invalidate references when more than one extended instruction set is generated. Reviewed By: mravishankar, antiagainst Differential Revision: https://reviews.llvm.org/D88626 --- mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp index 21f7349763db0..442afc14c2912 100644 --- a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp +++ b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp @@ -27,6 +27,8 @@ #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" +#include + using llvm::ArrayRef; using llvm::formatv; using llvm::raw_ostream; @@ -1031,7 +1033,7 @@ emitExtendedSetDeserializationDispatch(const RecordKeeper &recordKeeper, // raw_string_ostream needs a string&, use a vector to store all the string // that are captured by reference within raw_string_ostream. StringMap extensionSets; - SmallVector extensionSetNames; + std::list extensionSetNames; initExtendedSetDeserializationDispatch(extensionSetName, instructionID, words, os); From a3adcba645eec31b42ad0a1f727975c5c9c236f0 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 6 Oct 2020 17:48:56 +0000 Subject: [PATCH 170/321] [mlir][Linalg] Implement tiling on tensors This revision implements tiling on tensors as described in: https://llvm.discourse.group/t/an-update-on-linalg-on-tensors/1878/4 Differential revision: https://reviews.llvm.org/D88733 --- .../Linalg/IR/LinalgStructuredOpsInterface.td | 2 +- .../Dialect/Linalg/Transforms/Transforms.h | 15 +- .../include/mlir/Dialect/Linalg/Utils/Utils.h | 19 +- .../include/mlir/Dialect/StandardOps/IR/Ops.h | 2 +- mlir/lib/Dialect/Affine/IR/AffineOps.cpp | 5 + mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp | 2 +- mlir/lib/Dialect/Linalg/Transforms/Loops.cpp | 4 +- mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp | 197 +++++++++++------- .../Dialect/Linalg/Transforms/Transforms.cpp | 19 +- mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 28 +-- mlir/test/Dialect/Linalg/tile-tensors.mlir | 28 +++ 11 files changed, 203 insertions(+), 118 deletions(-) create mode 100644 mlir/test/Dialect/Linalg/tile-tensors.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td index c10a1e4f4e046..614fd8d2a7de6 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td @@ -647,7 +647,7 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { res.reserve(nExtraOperands); for (unsigned i = 0; i < nExtraOperands; ++i) { res.push_back(getOperation()->getOperand(numShapedOperands + i)); - assert((res.back().getType().isSignlessIntOrIndexOrFloat() + assert((res.back().getType().isSignlessIntOrIndexOrFloat() || res.back().getType().isa()) && "expected scalar or vector type"); } diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index e47dafc9bf52b..2e566c941894f 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -29,6 +29,7 @@ using LinalgLoops = SmallVector; struct TiledLinalgOp { LinalgOp op; SmallVector loops; + SmallVector tensorResults; }; struct TiledAndFusedLinalgOps { @@ -371,8 +372,9 @@ struct LinalgBaseTilingPattern : public RewritePattern { LinalgTilingOptions options, LinalgMarker marker = LinalgMarker(), PatternBenefit benefit = 1); - LogicalResult matchAndRewrite(Operation *op, - PatternRewriter &rewriter) const override; + LogicalResult + matchAndRewriteBase(Operation *op, PatternRewriter &rewriter, + SmallVectorImpl &tensorResults) const; private: /// LinalgTransformMarker handles special attribute manipulations. @@ -390,9 +392,14 @@ struct LinalgTilingPattern : public LinalgBaseTilingPattern { marker, benefit) {} LogicalResult matchAndRewrite(Operation *op, PatternRewriter &rewriter) const override { - if (failed(LinalgBaseTilingPattern::matchAndRewrite(op, rewriter))) + SmallVector tensorResults; + if (failed(LinalgBaseTilingPattern::matchAndRewriteBase(op, rewriter, + tensorResults))) return failure(); - rewriter.eraseOp(op); + if (tensorResults.empty()) + rewriter.eraseOp(op); + else + rewriter.replaceOp(op, tensorResults); return success(); } }; diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index b4e5be58bad73..ffcac5f48aa4a 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -95,17 +95,17 @@ Operation *fuseTensorOps(PatternRewriter &rewriter, Operation *consumer, unsigned consumerIdx, OperationFolder *folder = nullptr); -/// Returns the linearized list of all view dimensions in a `linalgOp`. Applying -/// the inverse, concatenated loopToOperandRangeMaps to this list allows the -/// derivation of loop ranges for any linalgOp. -SmallVector getViewSizes(OpBuilder &builder, LinalgOp linalgOp); +/// Returns the linearized list of all shape dimensions in a `linalgOp`. +/// Applying the inverse, concatenated loopToOperandRangeMaps to this list +/// allows the derivation of loop ranges for any linalgOp. +SmallVector getShape(OpBuilder &builder, LinalgOp linalgOp); template -SmallVector getViewSizes(OpBuilder &builder, ConcreteOpTy linalgOp) { - return getViewSizes(builder, cast(linalgOp.getOperation())); +SmallVector getShape(OpBuilder &builder, ConcreteOpTy linalgOp) { + return getShape(builder, cast(linalgOp.getOperation())); } /// Returns the loop ranges of the `linalgOp`. Applies the inverse of the -/// concatenated indexing maps to the result of `getViewSizes`. Returns None if +/// concatenated indexing maps to the result of `getShape`. Returns None if /// the bounds computation fails. Optional> getLoopRanges(OpBuilder &builder, LinalgOp linalgOp, @@ -119,11 +119,6 @@ SmallVector applyMapToValues(OpBuilder &b, Location loc, AffineMap map, ValueRange values, OperationFolder *folder = nullptr); -/// Returns all the operands of `linalgOp` that are not views. -/// Asserts that these operands are value types to allow transformations like -/// tiling to just use the values when cloning `linalgOp`. -SmallVector getAssumedNonViewOperands(LinalgOp linalgOp); - /// Apply the permutation defined by `permutation` to `inVec`. /// Element `i` in `inVec` is mapped to location `j = permutation[i]`. /// E.g.: for an input vector `inVec = ['a', 'b', 'c']` and a permutation vector diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h index 409f54384aca5..747a83414a087 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h @@ -315,7 +315,7 @@ ParseResult parseDimAndSymbolList(OpAsmParser &parser, /// source memref. This is useful to to fold a memref_cast into a consuming op /// and implement canonicalization patterns for ops in different dialects that /// may consume the results of memref_cast operations. Such foldable memref_cast -/// operations are typically inserted as `view` and `subview` ops are +/// operations are typically inserted as `view` and `subview` ops and are /// canonicalized, to preserve the type compatibility of their uses. /// /// Returns true when all conditions are met: diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp index bd45c8d667f99..abfc0001ed3e9 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -199,6 +199,11 @@ static bool isDimOpValidSymbol(DimOp dimOp, Region *region) { if (isTopLevelValue(dimOp.memrefOrTensor())) return true; + // Conservatively handle remaining BlockArguments as non-valid symbols. + // E.g. scf.for iterArgs. + if (dimOp.memrefOrTensor().isa()) + return false; + // The dim op is also okay if its operand memref/tensor is a view/subview // whose corresponding size is a valid symbol. Optional index = dimOp.getConstantIndex(); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp index 7b16a9197f116..585b8810fdc25 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -97,7 +97,7 @@ static LinalgOp cloneWithLoopRanges(OpBuilder &b, Location loc, LinalgOp op, clonedViews.push_back( b.create(loc, view, offsets, sizes, strides)); } - auto operands = getAssumedNonViewOperands(op); + auto operands = op.getAssumedNonShapedOperands(); clonedViews.append(operands.begin(), operands.end()); Operation *clonedOp = op.clone(b, loc, /*resultTypes*/ {}, clonedViews); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp index a9e7a86602300..9e96c8cdc6919 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -508,10 +508,10 @@ Optional linalgOpToLoopsImpl(Operation *op, OpBuilder &builder) { linalgOp.indexing_maps().template getAsRange(); auto maps = llvm::to_vector<8>( llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); })); - SmallVector sizes = getViewSizes(builder, linalgOp); + SmallVector sizes = getShape(builder, linalgOp); AffineMap map = concatAffineMaps(maps); auto loopRanges = emitLoopRanges(scope.getBuilderRef(), scope.getLocation(), - map, getViewSizes(builder, linalgOp)); + map, getShape(builder, linalgOp)); SmallVector allIvs; GenerateLoopNest::doit( loopRanges, /*iterInitArgs*/ {}, linalgOp.iterator_types().getValue(), diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index 3e8e0b74c1459..f7becae6e3282 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -56,18 +56,17 @@ using LoopIndexToRangeIndexMap = DenseMap; // indices of newly created loops. static std::tuple, LoopIndexToRangeIndexMap> makeTiledLoopRanges(OpBuilder &b, Location loc, AffineMap map, - ArrayRef allViewSizes, - ArrayRef allTileSizes) { + ValueRange allShapeSizes, ValueRange allTileSizes) { assert(allTileSizes.size() == map.getNumResults()); - // Apply `map` to get view sizes in loop order. - auto viewSizes = applyMapToValues(b, loc, map, allViewSizes); + // Apply `map` to get shape sizes in loop order. + auto shapeSizes = applyMapToValues(b, loc, map, allShapeSizes); SmallVector tileSizes(allTileSizes.begin(), allTileSizes.end()); // Traverse the tile sizes, which are in loop order, erase zeros everywhere. LoopIndexToRangeIndexMap loopIndexToRangeIndex; for (int idx = 0, e = tileSizes.size(), zerosCount = 0; idx < e; ++idx) { if (isZero(tileSizes[idx - zerosCount])) { - viewSizes.erase(viewSizes.begin() + idx - zerosCount); + shapeSizes.erase(shapeSizes.begin() + idx - zerosCount); tileSizes.erase(tileSizes.begin() + idx - zerosCount); ++zerosCount; continue; @@ -78,10 +77,10 @@ makeTiledLoopRanges(OpBuilder &b, Location loc, AffineMap map, // Create a new range with the applied tile sizes. SmallVector res; for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) - res.push_back(Range{std_constant_index(0), viewSizes[idx], tileSizes[idx]}); + res.push_back( + Range{std_constant_index(0), shapeSizes[idx], tileSizes[idx]}); return std::make_tuple(res, loopIndexToRangeIndex); } - namespace { // Helper visitor to determine whether an AffineExpr is tiled. @@ -93,7 +92,7 @@ namespace { // `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0] // struct TileCheck : public AffineExprVisitor { - TileCheck(ArrayRef tileSizes) : isTiled(false), tileSizes(tileSizes) {} + TileCheck(ValueRange tileSizes) : isTiled(false), tileSizes(tileSizes) {} void visitDimExpr(AffineDimExpr expr) { isTiled |= !isZero(tileSizes[expr.getPosition()]); @@ -106,7 +105,7 @@ struct TileCheck : public AffineExprVisitor { "nonpositive multiplying coefficient"); } bool isTiled; - ArrayRef tileSizes; + ValueRange tileSizes; }; } // namespace @@ -165,7 +164,6 @@ struct TileCheck : public AffineExprVisitor { static void transformIndexedGenericOpIndices( OpBuilder &b, LinalgOp op, SmallVectorImpl &ivs, const LoopIndexToRangeIndexMap &loopIndexToRangeIndex) { - assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics"); auto indexedGenericOp = dyn_cast(op.getOperation()); if (!indexedGenericOp) return; @@ -202,7 +200,7 @@ static void transformIndexedGenericOpIndices( } } -static bool isTiled(AffineExpr expr, ArrayRef tileSizes) { +static bool isTiled(AffineExpr expr, ValueRange tileSizes) { if (!expr) return false; TileCheck t(tileSizes); @@ -210,9 +208,8 @@ static bool isTiled(AffineExpr expr, ArrayRef tileSizes) { return t.isTiled; } -// Checks whether the view with index `viewIndex` within `linalgOp` varies with -// respect to a non-zero `tileSize`. -static bool isTiled(AffineMap map, ArrayRef tileSizes) { +// Checks whether the `map varies with respect to a non-zero `tileSize`. +static bool isTiled(AffineMap map, ValueRange tileSizes) { if (!map) return false; for (unsigned r = 0; r < map.getNumResults(); ++r) @@ -221,13 +218,11 @@ static bool isTiled(AffineMap map, ArrayRef tileSizes) { return false; } -static SmallVector makeTiledViews(OpBuilder &b, Location loc, - LinalgOp linalgOp, AffineMap map, - ArrayRef ivs, - ArrayRef tileSizes, - ArrayRef allViewSizes) { - assert(linalgOp.hasBufferSemantics() && - "expected linalg op with buffer semantics"); +static SmallVector +makeTiledShapes(OpBuilder &b, Location loc, LinalgOp linalgOp, + ValueRange operands, AffineMap map, ValueRange ivs, + ValueRange tileSizes, ValueRange allShapeSizes) { + assert(operands.size() == linalgOp.getShapedOperands().size()); assert(ivs.size() == static_cast(llvm::count_if( llvm::make_range(tileSizes.begin(), tileSizes.end()), [](Value v) { return !isZero(v); })) && @@ -235,37 +230,34 @@ static SmallVector makeTiledViews(OpBuilder &b, Location loc, using namespace edsc::op; - auto viewSizes = applyMapToValues(b, loc, map, allViewSizes); + auto shapeSizes = applyMapToValues(b, loc, map, allShapeSizes); // Construct (potentially temporary) mins and maxes on which to apply maps - // that define tile subviews. - SmallVector lbs, subViewSizes; + // that define tile subshapes. + SmallVector lbs, subShapeSizes; for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) { bool isTiled = !isZero(tileSizes[idx]); lbs.push_back(isTiled ? ivs[idxIvs++] : (Value)std_constant_index(0)); // Before composing, we need to make range a closed interval. - Value size = isTiled ? tileSizes[idx] : viewSizes[idx]; - subViewSizes.push_back(size - std_constant_index(1)); + Value size = isTiled ? tileSizes[idx] : shapeSizes[idx]; + subShapeSizes.push_back(size - std_constant_index(1)); } auto *op = linalgOp.getOperation(); SmallVector res; res.reserve(op->getNumOperands()); - auto viewIteratorBegin = linalgOp.getInputsAndOutputBuffers().begin(); - for (unsigned viewIndex = 0; viewIndex < linalgOp.getNumInputsAndOutputs(); - ++viewIndex) { - Value view = *(viewIteratorBegin + viewIndex); - auto viewType = view.getType().cast(); - unsigned rank = viewType.getRank(); - auto mapAttr = linalgOp.indexing_maps()[viewIndex]; - auto map = mapAttr.cast().getValue(); - // If the view is not tiled, we can use it as is. + for (auto en : llvm::enumerate(operands)) { + Value shapedOp = en.value(); + ShapedType shapedType = shapedOp.getType().cast(); + unsigned rank = shapedType.getRank(); + AffineMap map = linalgOp.getIndexingMap(en.index()); + // If the shape is not tiled, we can use it as is. if (!isTiled(map, tileSizes)) { - res.push_back(view); + res.push_back(shapedOp); continue; } - // Construct a new subview for the tile. + // Construct a new subview / subtensor for the tile. SmallVector offsets, sizes, strides; offsets.reserve(rank); sizes.reserve(rank); @@ -273,27 +265,27 @@ static SmallVector makeTiledViews(OpBuilder &b, Location loc, for (unsigned r = 0; r < rank; ++r) { if (!isTiled(map.getSubMap({r}), tileSizes)) { offsets.push_back(std_constant_index(0)); - sizes.push_back(std_dim(view, r)); + sizes.push_back(std_dim(shapedOp, r)); strides.push_back(std_constant_index(1)); continue; } // Tiling creates a new slice at the proper index, the slice step is 1 - // (i.e. the slice view does not subsample, stepping occurs in the loop). + // (i.e. the op does not subsample, stepping occurs in the loop). auto m = map.getSubMap({r}); auto offset = applyMapToValues(b, loc, m, lbs).front(); offsets.push_back(offset); - auto closedIntSize = applyMapToValues(b, loc, m, subViewSizes).front(); + auto closedIntSize = applyMapToValues(b, loc, m, subShapeSizes).front(); // Resulting size needs to be made half open interval again. auto size = closedIntSize + std_constant_index(1); - // The size of the subview should be trimmed to avoid out-of-bounds - // accesses, unless we statically know the subview size divides the view - // size evenly. - int64_t viewSize = viewType.getDimSize(r); + // The size of the subview / subtensor should be trimmed to avoid + // out-of-bounds accesses, unless we statically know the subshape size + // divides the shape size evenly. + int64_t shapeSize = shapedType.getDimSize(r); auto sizeCst = size.getDefiningOp(); - if (ShapedType::isDynamic(viewSize) || !sizeCst || - (viewSize % sizeCst.getValue()) != 0) { + if (ShapedType::isDynamic(shapeSize) || !sizeCst || + (shapeSize % sizeCst.getValue()) != 0) { // Compute min(size, dim - offset) to avoid out-of-bounds accesses. auto minMap = AffineMap::get( /*dimCount=*/3, /*symbolCount=*/0, @@ -301,7 +293,7 @@ static SmallVector makeTiledViews(OpBuilder &b, Location loc, getAffineDimExpr(/*position=*/1, b.getContext()) - getAffineDimExpr(/*position=*/2, b.getContext())}, b.getContext()); - auto d = std_dim(view, r); + auto d = std_dim(shapedOp, r); size = affine_min(b.getIndexType(), minMap, ValueRange{size, d, offset}); } @@ -310,7 +302,12 @@ static SmallVector makeTiledViews(OpBuilder &b, Location loc, strides.push_back(std_constant_index(1)); } - res.push_back(b.create(loc, view, offsets, sizes, strides)); + if (shapedType.isa()) + res.push_back( + b.create(loc, shapedOp, offsets, sizes, strides)); + else + res.push_back( + b.create(loc, shapedOp, offsets, sizes, strides)); } return res; @@ -318,7 +315,7 @@ static SmallVector makeTiledViews(OpBuilder &b, Location loc, template static Optional -tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ArrayRef tileSizes, +tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ValueRange tileSizes, const LinalgTilingOptions &options) { auto nLoops = op.getNumLoops(); // Initial tile sizes may be too big, only take the first nLoops. @@ -335,20 +332,20 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ArrayRef tileSizes, } // 1. Build the tiled loop ranges. - auto allViewSizes = getViewSizes(b, op); + auto allShapeSizes = getShape(b, op); // The flattened loopToOperandRangesMaps is expected to be an invertible // permutation map (asserted in the inverse calculation). auto mapsRange = op.indexing_maps().getAsRange(); auto maps = llvm::to_vector<8>( llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); })); - auto viewSizesToLoopsMap = inversePermutation(concatAffineMaps(maps)); - if (!viewSizesToLoopsMap) + auto shapeSizesToLoopsMap = inversePermutation(concatAffineMaps(maps)); + if (!shapeSizesToLoopsMap) return llvm::None; SmallVector loopRanges; LoopIndexToRangeIndexMap loopIndexToRangeIndex; std::tie(loopRanges, loopIndexToRangeIndex) = makeTiledLoopRanges( - b, op.getLoc(), viewSizesToLoopsMap, allViewSizes, tileSizes); + b, op.getLoc(), shapeSizesToLoopsMap, allShapeSizes, tileSizes); SmallVector iteratorTypes; for (auto attr : enumerate(op.iterator_types().cast().getValue())) { @@ -380,29 +377,77 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ArrayRef tileSizes, // 2. Create the tiled loops. LinalgOp res = op; - SmallVector ivs; + SmallVector ivs, tensorResults; + auto initTensors = op.getInitTensors(); GenerateLoopNest::doit( - loopRanges, /*iterArgInitValues*/ {}, iteratorTypes, + loopRanges, /*iterArgInitValues*/ initTensors, iteratorTypes, [&](ValueRange localIvs, ValueRange iterArgs) -> scf::ValueVector { auto &b = ScopedContext::getBuilderRef(); auto loc = ScopedContext::getLocation(); ivs.assign(localIvs.begin(), localIvs.end()); - SmallVector ivValues(ivs.begin(), ivs.end()); - // If we have to apply a permutation to the tiled loop nest, we have to - // reorder the induction variables This permutation is the right one - // assuming that loopRanges have previously been permuted by - // (i,j,k)->(k,i,j) So this permutation should be the inversePermutation - // of that one: (d0,d1,d2)->(d2,d0,d1) + // When an `interchangeVector` is present, it has been applied to the + // loop ranges and the iterator types. Apply its inverse to the + // resulting loop `ivs` to match the op definition. + SmallVector interchangedIvs; if (!options.interchangeVector.empty()) - ivValues = applyMapToValues(b, loc, invPermutationMap, ivValues); - - auto views = makeTiledViews(b, loc, op, viewSizesToLoopsMap, ivValues, - tileSizes, allViewSizes); - auto operands = getAssumedNonViewOperands(op); - views.append(operands.begin(), operands.end()); - res = op.clone(b, loc, /*resultTypes*/ {}, views); - return scf::ValueVector{}; + interchangedIvs = applyMapToValues(b, loc, invPermutationMap, ivs); + else + interchangedIvs.assign(ivs.begin(), ivs.end()); + + assert(op.getNumInitTensors() == iterArgs.size() && + "num init tensors must match number of loop iter arguments"); + // This uses knowledge about position of the init tensor in the list + // of operands. + auto operands = llvm::to_vector<4>(op.getShapedOperands()); + std::copy(iterArgs.begin(), iterArgs.end(), + operands.begin() + op.getNumInputsAndOutputBuffers()); + + SmallVector tiledOperands = + makeTiledShapes(b, loc, op, operands, shapeSizesToLoopsMap, + interchangedIvs, tileSizes, allShapeSizes); + auto nonShapedOperands = op.getAssumedNonShapedOperands(); + tiledOperands.append(nonShapedOperands.begin(), + nonShapedOperands.end()); + + // If LinalgOp has results, they must all be tied to init tensors. + // We enforce this to ensure all tiled ops have been rewritten in + // "init tensor" form. This ensures tiling has anchor values into which + // to subtensor / subtensor_insert. Otherwise tiling would need to + // allocate which is not acceptable. + // This would not be the case with a special terminator op that + // generates the whole tensor (instead of inserting a subtensor). But + // the generator-based abstraction has other issues. + assert(op.getNumInitTensors() == op.getOperation()->getNumResults() && + "expected same number of init tensors as number of results"); + + // Handle init tensor operands. + // This uses knowledge about position of the init tensor in the list + // of operands. + // TODO: InterfaceAdaptor ? + SmallVector resultTensorTypes; + for (auto idx : llvm::seq(0, op.getNumInitTensors())) + resultTensorTypes.push_back( + tiledOperands[op.getNumInputsAndOutputBuffers() + idx].getType()); + + res = op.clone(b, loc, resultTensorTypes, tiledOperands); + + // Insert a subtensor_insert for each init subtensor. + for (unsigned idx = 0, e = op.getNumInitTensors(); idx != e; ++idx) { + Value initTensor = + tiledOperands[op.getNumInputsAndOutputBuffers() + idx]; + if (auto subtensor = initTensor.getDefiningOp()) { + tensorResults.push_back(b.create( + loc, subtensor.source().getType(), + res.getOperation()->getResult(idx), subtensor.source(), + subtensor.offsets(), subtensor.sizes(), subtensor.strides(), + subtensor.static_offsets(), subtensor.static_sizes(), + subtensor.static_strides())); + } else { + tensorResults.push_back(res.getOperation()->getResult(idx)); + } + } + return scf::ValueVector(tensorResults.begin(), tensorResults.end()); }, options.distribution); @@ -422,7 +467,16 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ArrayRef tileSizes, loops.push_back(nullptr); } } - return TiledLinalgOp{res, loops}; + + // 5. Get the tensor results from the outermost loop if available. Otherwise + // use the previously captured `tensorResults`. + Operation *outermostLoop = nullptr; + for (Operation *loop : loops) + if ((outermostLoop = loop)) + break; + + return TiledLinalgOp{ + res, loops, outermostLoop ? outermostLoop->getResults() : tensorResults}; } template @@ -432,7 +486,6 @@ Optional static tileLinalgOpImpl( b.setInsertionPoint(op); ScopedContext scope(b, op.getLoc()); - assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics"); // Enforce the convention that "tiling by zero" skips tiling a particular // dimension. This convention is significantly simpler to handle instead of // adjusting affine maps to account for missing dimensions. @@ -513,7 +566,9 @@ mlir::linalg::getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx) { scf::ForOp::getCanonicalizationPatterns(patterns, ctx); scf::ParallelOp::getCanonicalizationPatterns(patterns, ctx); ConstantIndexOp::getCanonicalizationPatterns(patterns, ctx); + SubTensorOp::getCanonicalizationPatterns(patterns, ctx); SubViewOp::getCanonicalizationPatterns(patterns, ctx); + TensorCastOp::getCanonicalizationPatterns(patterns, ctx); ViewOp::getCanonicalizationPatterns(patterns, ctx); CanonicalizationPatternList< #define GET_OP_LIST diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index 56652cbcb5277..71e3108b2b58c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -111,19 +111,34 @@ mlir::linalg::LinalgBaseTilingPattern::LinalgBaseTilingPattern( : RewritePattern(opName, {}, benefit, context), marker(marker), options(options) {} -LogicalResult mlir::linalg::LinalgBaseTilingPattern::matchAndRewrite( - Operation *op, PatternRewriter &rewriter) const { +LogicalResult mlir::linalg::LinalgBaseTilingPattern::matchAndRewriteBase( + Operation *op, PatternRewriter &rewriter, + SmallVectorImpl &tensorResults) const { LinalgOp linalgOp = dyn_cast(op); if (!linalgOp) return failure(); if (failed(marker.checkAndNotify(rewriter, linalgOp))) return failure(); + // If LinalgOp has results, they must all be tied to init tensors. + // We enforce this to ensure all tiled ops have been rewritten in + // "init tensor" form. This ensures tiling has anchor values into which to + // subtensor / subtensor_insert. Otherwise tiling would need to allocate which + // is not acceptable. + // This would not be the case with a special terminator op that generates the + // whole tensor (instead of inserting a subtensor). But the generator-based + // abstraction has other issues. + if (linalgOp.getNumInitTensors() != linalgOp.getOperation()->getNumResults()) + return failure(); + Optional res = tileLinalgOp(rewriter, linalgOp, options); if (!res) return failure(); + // Return relevant information to derived pattern. + tensorResults = res->tensorResults; + // New marker if specified. marker.replaceLinalgMarker(rewriter, res->op.getOperation()); return success(); diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index f9ea9092d55dd..3f29949ffe631 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -85,26 +85,6 @@ SmallVector mlir::linalg::applyMapToValues(OpBuilder &b, Location loc, return res; } -/// Returns all the operands of `linalgOp` that are not views. -/// Asserts that these operands are value types to allow transformations like -/// tiling to just use the values when cloning `linalgOp`. -SmallVector -mlir::linalg::getAssumedNonViewOperands(LinalgOp linalgOp) { - auto *op = linalgOp.getOperation(); - unsigned numViews = linalgOp.getNumInputsAndOutputs(); - unsigned nOperands = op->getNumOperands() - numViews; - SmallVector res; - res.reserve(nOperands); - for (unsigned i = 0; i < nOperands; ++i) { - res.push_back(op->getOperand(numViews + i)); - auto t = res.back().getType(); - (void)t; - assert((t.isSignlessIntOrIndexOrFloat() || t.isa()) && - "expected scalar or vector type"); - } - return res; -} - bool mlir::linalg::isParallelIteratorType(Attribute attr) { if (auto strAttr = attr.dyn_cast()) { return strAttr.getValue() == getParallelIteratorTypeName(); @@ -147,12 +127,12 @@ namespace mlir { namespace linalg { /// Return the linearized list of all view dimensions in a linalgOp. -SmallVector getViewSizes(OpBuilder &builder, LinalgOp linalgOp) { +SmallVector getShape(OpBuilder &builder, LinalgOp linalgOp) { auto loc = linalgOp.getLoc(); SmallVector res; SmallVector ranks; - for (auto v : linalgOp.getInputsAndOutputBuffers()) { - MemRefType t = v.getType().template cast(); + for (Value v : linalgOp.getShapedOperands()) { + ShapedType t = v.getType().template cast(); ranks.push_back(t.getRank()); for (unsigned i = 0; i < t.getRank(); ++i) res.push_back(builder.create(loc, v, i)); @@ -181,7 +161,7 @@ SmallVector getViewSizes(OpBuilder &builder, LinalgOp linalgOp) { Optional> getLoopRanges(OpBuilder &builder, LinalgOp linalgOp, OperationFolder *folder) { - SmallVector viewSizes = getViewSizes(builder, linalgOp); + SmallVector viewSizes = getShape(builder, linalgOp); AffineMap invertedMap = inversePermutation(concatAffineMaps(linalgOp.getIndexingMaps())); if (!invertedMap) diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir new file mode 100644 index 0000000000000..b899cb3e00495 --- /dev/null +++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir @@ -0,0 +1,28 @@ +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,4" -mlir-disable-threading=true | FileCheck %s + +// CHECK-LABEL: func @matmul_tensors( +// CHECK-SAME: %[[TA:[0-9a-z]+]]: tensor +// CHECK-SAME: %[[TB:[0-9a-z]+]]: tensor +// CHECK-SAME: %[[TC:[0-9a-z]+]]: tensor) -> tensor { +func @matmul_tensors( + %arg0: tensor, %arg1: tensor, %arg2: tensor) + -> tensor { +// CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor) { +// CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor) { +// CHECK: %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor) { +// CHECK: %[[sTA:.*]] = subtensor %[[TA]][{{.*}}] : tensor to tensor +// CHECK: %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor to tensor +// CHECK: %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor to tensor +// CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor, tensor) +// CHECK-SAME: init(%[[sTC]] : tensor) -> tensor +// CHECK: %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor +// CHECK: scf.yield %[[TD]] : tensor +// CHECK: scf.yield %[[TD2]] : tensor +// CHECK: scf.yield %[[TD1]] : tensor + %0 = linalg.matmul ins(%arg0, %arg1: tensor, tensor) + init(%arg2: tensor) + -> tensor + +// CHECK: return %[[TD0]] : tensor + return %0 : tensor +} From 43cd0a98d1b1cbbbab38591badbe11a995844cf7 Mon Sep 17 00:00:00 2001 From: Fanbo Meng Date: Tue, 6 Oct 2020 14:19:18 -0400 Subject: [PATCH 171/321] [SystemZ][z/OS] Set default alignment rules for z/OS target Update RUN line to fix lit failure Differential Revision: https://reviews.llvm.org/D88845 --- clang/test/CodeGen/zos-alignment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGen/zos-alignment.c b/clang/test/CodeGen/zos-alignment.c index 5a72096a56f6a..9d7bfe8923d0c 100644 --- a/clang/test/CodeGen/zos-alignment.c +++ b/clang/test/CodeGen/zos-alignment.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -emit-llvm -triple s390x-none-zos -fdump-record-layouts %s -o - | FileCheck %s +// RUN: %clang_cc1 -emit-llvm-only -triple s390x-none-zos -fdump-record-layouts %s | FileCheck %s struct s0 { short a:3; From 0f8294072fbc75cc4f6d0e1c3e16f9849aae3771 Mon Sep 17 00:00:00 2001 From: Eric Schweitz Date: Tue, 6 Oct 2020 09:31:16 -0700 Subject: [PATCH 172/321] [NFC][flang] Add the header file Todo.h. This file is being upstreamed to satisfy dependencies and enable continued progress on lowering of OpenMP, OpenACC, etc. Differential Revision: https://reviews.llvm.org/D88909 --- flang/include/flang/Lower/Todo.h | 49 ++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 flang/include/flang/Lower/Todo.h diff --git a/flang/include/flang/Lower/Todo.h b/flang/include/flang/Lower/Todo.h new file mode 100644 index 0000000000000..4da24f305d94d --- /dev/null +++ b/flang/include/flang/Lower/Todo.h @@ -0,0 +1,49 @@ +//===-- Lower/Todo.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_LOWER_TODO_H +#define FORTRAN_LOWER_TODO_H + +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include + +// This is throw-away code used to mark areas of the code that have not yet been +// developed. + +#undef TODO + +#ifdef NDEBUG + +// In a release build, just give a message and exit. +#define TODO(ToDoMsg) \ + do { \ + llvm::errs() << __FILE__ << ':' << __LINE__ << ": not yet implemented " \ + << ToDoMsg << '\n'; \ + std::exit(1); \ + } while (false) + +#else + +#undef TODOQUOTE +#define TODOQUOTE(X) #X + +// In a developer build, print a message and give a backtrace. +#define TODO(ToDoMsg) \ + do { \ + llvm::report_fatal_error( \ + __FILE__ ":" TODOQUOTE(__LINE__) ": not yet implemented " ToDoMsg); \ + } while (false) + +#endif + +#endif // FORTRAN_LOWER_TODO_H From bf5c1d92d92ef8cee2adbfa17ecca20a8f65dc0e Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Tue, 6 Oct 2020 18:45:04 +0000 Subject: [PATCH 173/321] [AMDGPU] Fix remaining kernel descriptor test Follow up on e4a9e4ef554a to fix a test I missed in the original patch. Committed as obvious. --- llvm/test/MC/AMDGPU/hsa-gfx10-v3.s | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s index f6cc91af14076..34b207797bf93 100644 --- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s @@ -3,9 +3,6 @@ // RUN: llvm-readobj -elf-output-style=GNU -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s // RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s -// big endian not supported -// XFAIL: host-byteorder-big-endian - // READOBJ: Section Headers // READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 // READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 0000c0 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 From 6e557bc40507cbc5e331179b26f7ae5fe9624294 Mon Sep 17 00:00:00 2001 From: Thomas Raoux Date: Tue, 6 Oct 2020 11:35:14 -0700 Subject: [PATCH 174/321] [mlir][spirv] Add Vector to SPIR-V conversion pass Add conversion pass for Vector dialect to SPIR-V dialect and add some simple conversion pattern for vector.broadcast, vector.insert, vector.extract. Differential Revision: https://reviews.llvm.org/D88761 --- mlir/include/mlir/Conversion/Passes.h | 1 + mlir/include/mlir/Conversion/Passes.td | 11 ++ .../VectorToSPIRV/ConvertVectorToSPIRV.h | 29 +++++ .../VectorToSPIRV/ConvertVectorToSPIRVPass.h | 25 ++++ .../mlir/Dialect/SPIRV/SPIRVCompositeOps.td | 5 + mlir/lib/Conversion/CMakeLists.txt | 1 + .../Conversion/VectorToSPIRV/CMakeLists.txt | 15 +++ .../VectorToSPIRV/VectorToSPIRV.cpp | 119 ++++++++++++++++++ mlir/lib/Dialect/SPIRV/SPIRVOps.cpp | 7 ++ .../test/Conversion/VectorToSPIRV/simple.mlir | 23 ++++ 10 files changed, 236 insertions(+) create mode 100644 mlir/include/mlir/Conversion/VectorToSPIRV/ConvertVectorToSPIRV.h create mode 100644 mlir/include/mlir/Conversion/VectorToSPIRV/ConvertVectorToSPIRVPass.h create mode 100644 mlir/lib/Conversion/VectorToSPIRV/CMakeLists.txt create mode 100644 mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp create mode 100644 mlir/test/Conversion/VectorToSPIRV/simple.mlir diff --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h index b04498598b290..b4418bb2e0ac2 100644 --- a/mlir/include/mlir/Conversion/Passes.h +++ b/mlir/include/mlir/Conversion/Passes.h @@ -29,6 +29,7 @@ #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h" #include "mlir/Conversion/VectorToROCDL/VectorToROCDL.h" #include "mlir/Conversion/VectorToSCF/VectorToSCF.h" +#include "mlir/Conversion/VectorToSPIRV/ConvertVectorToSPIRVPass.h" namespace mlir { diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 547b952b60b4b..36618384bb392 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -381,4 +381,15 @@ def ConvertVectorToROCDL : Pass<"convert-vector-to-rocdl", "ModuleOp"> { let dependentDialects = ["ROCDL::ROCDLDialect"]; } +//===----------------------------------------------------------------------===// +// VectorToSPIRV +//===----------------------------------------------------------------------===// + +def ConvertVectorToSPIRV : Pass<"convert-vector-to-spirv", "ModuleOp"> { + let summary = "Lower the operations from the vector dialect into the SPIR-V " + "dialect"; + let constructor = "mlir::createConvertVectorToSPIRVPass()"; + let dependentDialects = ["spirv::SPIRVDialect"]; +} + #endif // MLIR_CONVERSION_PASSES diff --git a/mlir/include/mlir/Conversion/VectorToSPIRV/ConvertVectorToSPIRV.h b/mlir/include/mlir/Conversion/VectorToSPIRV/ConvertVectorToSPIRV.h new file mode 100644 index 0000000000000..de664df83e83b --- /dev/null +++ b/mlir/include/mlir/Conversion/VectorToSPIRV/ConvertVectorToSPIRV.h @@ -0,0 +1,29 @@ +//=- ConvertVectorToSPIRV.h - Vector Ops to SPIR-V dialect patterns - C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Provides patterns for lowering Vector Ops to SPIR-V dialect. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_INCLUDE_MLIR_CONVERSION_VECTORTOSPIRV_CONVERTVECTORTOSPIRV_H_ +#define MLIR_INCLUDE_MLIR_CONVERSION_VECTORTOSPIRV_CONVERTVECTORTOSPIRV_H_ + +#include "mlir/Transforms/DialectConversion.h" + +namespace mlir { +class SPIRVTypeConverter; + +/// Appends to a pattern list additional patterns for translating Vector Ops to +/// SPIR-V ops. +void populateVectorToSPIRVPatterns(MLIRContext *context, + SPIRVTypeConverter &typeConverter, + OwningRewritePatternList &patterns); + +} // namespace mlir + +#endif // MLIR_INCLUDE_MLIR_CONVERSION_VECTORTOSPIRV_CONVERTVECTORTOSPIRV_H_ diff --git a/mlir/include/mlir/Conversion/VectorToSPIRV/ConvertVectorToSPIRVPass.h b/mlir/include/mlir/Conversion/VectorToSPIRV/ConvertVectorToSPIRVPass.h new file mode 100644 index 0000000000000..7d4c7c1fb0259 --- /dev/null +++ b/mlir/include/mlir/Conversion/VectorToSPIRV/ConvertVectorToSPIRVPass.h @@ -0,0 +1,25 @@ +//=- ConvertVectorToSPIRVPass.h - Pass converting Vector to SPIRV -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Provides a pass to convert Vector ops to SPIR-V ops. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_VECTORTOSPIRV_CONVERTGPUTOSPIRVPASS_H +#define MLIR_CONVERSION_VECTORTOSPIRV_CONVERTGPUTOSPIRVPASS_H + +#include "mlir/Pass/Pass.h" + +namespace mlir { + +/// Pass to convert Vector Ops to SPIR-V ops. +std::unique_ptr> createConvertVectorToSPIRVPass(); + +} // namespace mlir + +#endif // MLIR_CONVERSION_VECTORTOSPIRV_CONVERTGPUTOSPIRVPASS_H diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVCompositeOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVCompositeOps.td index d6e66a6ee1a71..c3a867977b3ed 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVCompositeOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVCompositeOps.td @@ -161,6 +161,11 @@ def SPV_CompositeInsertOp : SPV_Op<"CompositeInsert", [NoSideEffect]> { let results = (outs SPV_Composite:$result ); + + let builders = [ + OpBuilder<[{OpBuilder &builder, OperationState &state, Value object, + Value composite, ArrayRef indices}]> + ]; } #endif // SPIRV_COMPOSITE_OPS diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt index fe2af07b2a6a8..dbb9ed699798d 100644 --- a/mlir/lib/Conversion/CMakeLists.txt +++ b/mlir/lib/Conversion/CMakeLists.txt @@ -19,3 +19,4 @@ add_subdirectory(StandardToSPIRV) add_subdirectory(VectorToROCDL) add_subdirectory(VectorToLLVM) add_subdirectory(VectorToSCF) +add_subdirectory(VectorToSPIRV) diff --git a/mlir/lib/Conversion/VectorToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/VectorToSPIRV/CMakeLists.txt new file mode 100644 index 0000000000000..a6e73002de25f --- /dev/null +++ b/mlir/lib/Conversion/VectorToSPIRV/CMakeLists.txt @@ -0,0 +1,15 @@ +add_mlir_conversion_library(MLIRVectorToSPIRV + VectorToSPIRV.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/VectorToSPIRV + + DEPENDS + MLIRConversionPassIncGen + intrinsics_gen + + LINK_LIBS PUBLIC + MLIRSPIRV + MLIRVector + MLIRTransforms + ) diff --git a/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp b/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp new file mode 100644 index 0000000000000..05949fb599104 --- /dev/null +++ b/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp @@ -0,0 +1,119 @@ +//===------- VectorToSPIRV.cpp - Vector to SPIRV lowering passes ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass to generate SPIRV operations for Vector +// operations. +// +//===----------------------------------------------------------------------===// + +#include "../PassDetail.h" +#include "mlir/Conversion/VectorToSPIRV/ConvertVectorToSPIRV.h" +#include "mlir/Conversion/VectorToSPIRV/ConvertVectorToSPIRVPass.h" +#include "mlir/Dialect/SPIRV/SPIRVDialect.h" +#include "mlir/Dialect/SPIRV/SPIRVLowering.h" +#include "mlir/Dialect/SPIRV/SPIRVOps.h" +#include "mlir/Dialect/SPIRV/SPIRVTypes.h" +#include "mlir/Dialect/Vector/VectorOps.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +namespace { +struct VectorBroadcastConvert final + : public SPIRVOpLowering { + using SPIRVOpLowering::SPIRVOpLowering; + LogicalResult + matchAndRewrite(vector::BroadcastOp broadcastOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + if (broadcastOp.source().getType().isa() || + !spirv::CompositeType::isValid(broadcastOp.getVectorType())) + return failure(); + vector::BroadcastOp::Adaptor adaptor(operands); + SmallVector source(broadcastOp.getVectorType().getNumElements(), + adaptor.source()); + Value construct = rewriter.create( + broadcastOp.getLoc(), broadcastOp.getVectorType(), source); + rewriter.replaceOp(broadcastOp, construct); + return success(); + } +}; + +struct VectorExtractOpConvert final + : public SPIRVOpLowering { + using SPIRVOpLowering::SPIRVOpLowering; + LogicalResult + matchAndRewrite(vector::ExtractOp extractOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + if (extractOp.getType().isa() || + !spirv::CompositeType::isValid(extractOp.getVectorType())) + return failure(); + vector::ExtractOp::Adaptor adaptor(operands); + int32_t id = extractOp.position().begin()->cast().getInt(); + Value newExtract = rewriter.create( + extractOp.getLoc(), adaptor.vector(), id); + rewriter.replaceOp(extractOp, newExtract); + return success(); + } +}; + +struct VectorInsertOpConvert final : public SPIRVOpLowering { + using SPIRVOpLowering::SPIRVOpLowering; + LogicalResult + matchAndRewrite(vector::InsertOp insertOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + if (insertOp.getSourceType().isa() || + !spirv::CompositeType::isValid(insertOp.getDestVectorType())) + return failure(); + vector::InsertOp::Adaptor adaptor(operands); + int32_t id = insertOp.position().begin()->cast().getInt(); + Value newInsert = rewriter.create( + insertOp.getLoc(), adaptor.source(), adaptor.dest(), id); + rewriter.replaceOp(insertOp, newInsert); + return success(); + } +}; +} // namespace + +void mlir::populateVectorToSPIRVPatterns(MLIRContext *context, + SPIRVTypeConverter &typeConverter, + OwningRewritePatternList &patterns) { + patterns.insert(context, typeConverter); +} + +namespace { +struct LowerVectorToSPIRVPass + : public ConvertVectorToSPIRVBase { + void runOnOperation() override; +}; +} // namespace + +void LowerVectorToSPIRVPass::runOnOperation() { + MLIRContext *context = &getContext(); + ModuleOp module = getOperation(); + + auto targetAttr = spirv::lookupTargetEnvOrDefault(module); + std::unique_ptr target = + spirv::SPIRVConversionTarget::get(targetAttr); + + SPIRVTypeConverter typeConverter(targetAttr); + OwningRewritePatternList patterns; + populateVectorToSPIRVPatterns(context, typeConverter, patterns); + + target->addLegalOp(); + target->addLegalOp(); + + if (failed(applyFullConversion(module, *target, patterns))) + return signalPassFailure(); +} + +std::unique_ptr> +mlir::createConvertVectorToSPIRVPass() { + return std::make_unique(); +} diff --git a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp index ad25ecb427a6c..c17490c05e6ba 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp @@ -1410,6 +1410,13 @@ static LogicalResult verify(spirv::CompositeExtractOp compExOp) { // spv.CompositeInsert //===----------------------------------------------------------------------===// +void spirv::CompositeInsertOp::build(OpBuilder &builder, OperationState &state, + Value object, Value composite, + ArrayRef indices) { + auto indexAttr = builder.getI32ArrayAttr(indices); + build(builder, state, composite.getType(), object, composite, indexAttr); +} + static ParseResult parseCompositeInsertOp(OpAsmParser &parser, OperationState &state) { SmallVector operands; diff --git a/mlir/test/Conversion/VectorToSPIRV/simple.mlir b/mlir/test/Conversion/VectorToSPIRV/simple.mlir new file mode 100644 index 0000000000000..34f1ef52c2379 --- /dev/null +++ b/mlir/test/Conversion/VectorToSPIRV/simple.mlir @@ -0,0 +1,23 @@ +// RUN: mlir-opt -split-input-file -convert-vector-to-spirv %s -o - | FileCheck %s + +// CHECK-LABEL: broadcast +// CHECK-SAME: %[[A:.*]]: f32 +// CHECK: spv.CompositeConstruct %[[A]], %[[A]], %[[A]], %[[A]] : vector<4xf32> +// CHECK: spv.CompositeConstruct %[[A]], %[[A]] : vector<2xf32> +func @broadcast(%arg0 : f32) { + %0 = vector.broadcast %arg0 : f32 to vector<4xf32> + %1 = vector.broadcast %arg0 : f32 to vector<2xf32> + spv.Return +} + +// ----- + +// CHECK-LABEL: extract_insert +// CHECK-SAME: %[[V:.*]]: vector<4xf32> +// CHECK: %[[S:.*]] = spv.CompositeExtract %[[V]][1 : i32] : vector<4xf32> +// CHECK: spv.CompositeInsert %[[S]], %[[V]][0 : i32] : f32 into vector<4xf32> +func @extract_insert(%arg0 : vector<4xf32>) { + %0 = vector.extract %arg0[1] : vector<4xf32> + %1 = vector.insert %0, %arg0[0] : f32 into vector<4xf32> + spv.Return +} From d85b845cb2550216b2a05c5dee451f423a4e4946 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Mon, 5 Oct 2020 16:49:29 -0700 Subject: [PATCH 175/321] [NFC][MC] Type uses of MCRegUnitIterator as MCRegister This is one of many subsequent similar changes. Note that we're ok with the parameter being typed as MCPhysReg, as MCPhysReg -> MCRegister is a correct conversion; Register -> MCRegister assumes the former is indeed physical, so we stop relying on the implicit conversion and use the explicit, value-asserting asMCReg(). Differential Revision: https://reviews.llvm.org/D88862 --- llvm/include/llvm/CodeGen/LiveIntervals.h | 2 +- llvm/include/llvm/CodeGen/TargetRegisterInfo.h | 6 +++--- llvm/lib/CodeGen/BreakFalseDeps.cpp | 2 +- llvm/lib/CodeGen/EarlyIfConversion.cpp | 10 ++++++---- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h index 945a40829714b..7818aade21f99 100644 --- a/llvm/include/llvm/CodeGen/LiveIntervals.h +++ b/llvm/include/llvm/CodeGen/LiveIntervals.h @@ -423,7 +423,7 @@ class VirtRegMap; /// Reg. Subsequent uses should rely on on-demand recomputation. \note This /// method can result in inconsistent liveness tracking if multiple phyical /// registers share a regunit, and should be used cautiously. - void removeAllRegUnitsForPhysReg(unsigned Reg) { + void removeAllRegUnitsForPhysReg(MCRegister Reg) { for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) removeRegUnit(*Units); } diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index af6a5fa171a62..fc3e0ec6faa6e 100644 --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -386,12 +386,12 @@ class TargetRegisterInfo : public MCRegisterInfo { /// The registers may be virtual registers. bool regsOverlap(Register regA, Register regB) const { if (regA == regB) return true; - if (regA.isVirtual() || regB.isVirtual()) + if (!regA.isPhysical() || !regB.isPhysical()) return false; // Regunits are numerically ordered. Find a common unit. - MCRegUnitIterator RUA(regA, this); - MCRegUnitIterator RUB(regB, this); + MCRegUnitIterator RUA(regA.asMCReg(), this); + MCRegUnitIterator RUB(regB.asMCReg(), this); do { if (*RUA == *RUB) return true; if (*RUA < *RUB) ++RUA; diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp index 071d44d61b168..69755358a71eb 100644 --- a/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -118,7 +118,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, if (!MO.isRenamable()) return false; - Register OriginalReg = MO.getReg(); + MCRegister OriginalReg = MO.getReg().asMCReg(); // Update only undef operands that have reg units that are mapped to one root. for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) { diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp index a580d3cc5785c..cf7d93d6a33a6 100644 --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -265,7 +265,8 @@ bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) { // Remember clobbered regunits. if (MO.isDef() && Register::isPhysicalRegister(Reg)) - for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid(); + ++Units) ClobberedRegUnits.set(*Units); if (!MO.readsReg() || !Register::isVirtualRegister(Reg)) @@ -364,7 +365,7 @@ bool SSAIfConv::findInsertionPoint() { // Keep track of live regunits before the current position. // Only track RegUnits that are also in ClobberedRegUnits. LiveRegUnits.clear(); - SmallVector Reads; + SmallVector Reads; MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); MachineBasicBlock::iterator I = Head->end(); MachineBasicBlock::iterator B = Head->begin(); @@ -386,11 +387,12 @@ bool SSAIfConv::findInsertionPoint() { continue; // I clobbers Reg, so it isn't live before I. if (MO.isDef()) - for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid(); + ++Units) LiveRegUnits.erase(*Units); // Unless I reads Reg. if (MO.readsReg()) - Reads.push_back(Reg); + Reads.push_back(Reg.asMCReg()); } // Anything read by I is live before I. while (!Reads.empty()) From 00d3e6c1b4d0b7879afc6002b721111b49ecf755 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Mon, 5 Oct 2020 17:52:23 -0700 Subject: [PATCH 176/321] [c++17] Implement P0145R3 during constant evaluation. Ensure that we evaluate assignment and compound-assignment right-to-left, and array subscripting left-to-right. Fixes PR47724. This is a re-commit of ded79be, reverted in 37c74df, with a fix and test for the crasher bug previously introduced. --- clang/lib/AST/ExprConstant.cpp | 113 ++++++++++------- .../SemaCXX/constant-expression-cxx1z.cpp | 118 ++++++++++++++++++ clang/www/cxx_status.html | 1 + 3 files changed, 189 insertions(+), 43 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 4460e3a17e6da..639a5733b34b8 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -1856,8 +1856,12 @@ void CallStackFrame::describe(raw_ostream &Out) { Out << ", "; const ParmVarDecl *Param = *I; - const APValue &Arg = Arguments[ArgIndex]; - Arg.printPretty(Out, Info.Ctx, Param->getType()); + if (Arguments) { + const APValue &Arg = Arguments[ArgIndex]; + Arg.printPretty(Out, Info.Ctx, Param->getType()); + } else { + Out << "<...>"; + } if (ArgIndex == 0 && IsMemberCall) Out << "->" << *Callee << '('; @@ -5792,6 +5796,8 @@ typedef SmallVector ArgVector; /// EvaluateArgs - Evaluate the arguments to a function call. static bool EvaluateArgs(ArrayRef Args, ArgVector &ArgValues, EvalInfo &Info, const FunctionDecl *Callee) { + ArgValues.resize(Args.size()); + bool Success = true; llvm::SmallBitVector ForbiddenNullArgs; if (Callee->hasAttr()) { @@ -5809,8 +5815,6 @@ static bool EvaluateArgs(ArrayRef Args, ArgVector &ArgValues, } } } - // FIXME: This is the wrong evaluation order for an assignment operator - // called via operator syntax. for (unsigned Idx = 0; Idx < Args.size(); Idx++) { if (!Evaluate(ArgValues[Idx], Info, Args[Idx])) { // If we're checking for a potential constant expression, evaluate all @@ -5834,17 +5838,13 @@ static bool EvaluateArgs(ArrayRef Args, ArgVector &ArgValues, /// Evaluate a function call. static bool HandleFunctionCall(SourceLocation CallLoc, const FunctionDecl *Callee, const LValue *This, - ArrayRef Args, const Stmt *Body, - EvalInfo &Info, APValue &Result, - const LValue *ResultSlot) { - ArgVector ArgValues(Args.size()); - if (!EvaluateArgs(Args, ArgValues, Info, Callee)) - return false; - + ArrayRef Args, APValue *ArgValues, + const Stmt *Body, EvalInfo &Info, + APValue &Result, const LValue *ResultSlot) { if (!Info.CheckCallLimit(CallLoc)) return false; - CallStackFrame Frame(Info, CallLoc, Callee, This, ArgValues.data()); + CallStackFrame Frame(Info, CallLoc, Callee, This, ArgValues); // For a trivial copy or move assignment, perform an APValue copy. This is // essential for unions, where the operations performed by the assignment @@ -7293,6 +7293,8 @@ class ExprEvaluatorBase auto Args = llvm::makeArrayRef(E->getArgs(), E->getNumArgs()); bool HasQualifier = false; + ArgVector ArgValues; + // Extract function decl and 'this' pointer from the callee. if (CalleeType->isSpecificBuiltinType(BuiltinType::BoundMember)) { const CXXMethodDecl *Member = nullptr; @@ -7341,6 +7343,22 @@ class ExprEvaluatorBase return Error(E); } + // For an (overloaded) assignment expression, evaluate the RHS before the + // LHS. + auto *OCE = dyn_cast(E); + if (OCE && OCE->isAssignmentOp()) { + assert(Args.size() == 2 && "wrong number of arguments in assignment"); + if (isa(FD)) { + // Args[0] is the object argument. + if (!EvaluateArgs({Args[1]}, ArgValues, Info, FD)) + return false; + } else { + if (!EvaluateArgs({Args[1], Args[0]}, ArgValues, Info, FD)) + return false; + std::swap(ArgValues[0], ArgValues[1]); + } + } + // Overloaded operator calls to member functions are represented as normal // calls with '*this' as the first argument. const CXXMethodDecl *MD = dyn_cast(FD); @@ -7403,6 +7421,11 @@ class ExprEvaluatorBase } else return Error(E); + // Evaluate the arguments now if we've not already done so. + if (ArgValues.empty() && !Args.empty() && + !EvaluateArgs(Args, ArgValues, Info, FD)) + return false; + SmallVector CovariantAdjustmentPath; if (This) { auto *NamedMember = dyn_cast(FD); @@ -7424,6 +7447,7 @@ class ExprEvaluatorBase // Destructor calls are different enough that they have their own codepath. if (auto *DD = dyn_cast(FD)) { assert(This && "no 'this' pointer for destructor call"); + assert(ArgValues.empty() && "unexpected destructor arguments"); return HandleDestruction(Info, E, *This, Info.Ctx.getRecordType(DD->getParent())); } @@ -7432,8 +7456,8 @@ class ExprEvaluatorBase Stmt *Body = FD->getBody(Definition); if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition, Body) || - !HandleFunctionCall(E->getExprLoc(), Definition, This, Args, Body, Info, - Result, ResultSlot)) + !HandleFunctionCall(E->getExprLoc(), Definition, This, Args, + ArgValues.data(), Body, Info, Result, ResultSlot)) return false; if (!CovariantAdjustmentPath.empty() && @@ -8071,16 +8095,19 @@ bool LValueExprEvaluator::VisitArraySubscriptExpr(const ArraySubscriptExpr *E) { if (E->getBase()->getType()->isVectorType()) return Error(E); + APSInt Index; bool Success = true; - if (!evaluatePointer(E->getBase(), Result)) { - if (!Info.noteFailure()) - return false; - Success = false; - } - APSInt Index; - if (!EvaluateInteger(E->getIdx(), Index, Info)) - return false; + // C++17's rules require us to evaluate the LHS first, regardless of which + // side is the base. + for (const Expr *SubExpr : {E->getLHS(), E->getRHS()}) { + if (SubExpr == E->getBase() ? !evaluatePointer(SubExpr, Result) + : !EvaluateInteger(SubExpr, Index, Info)) { + if (!Info.noteFailure()) + return false; + Success = false; + } + } return Success && HandleLValueArrayAdjustment(Info, E, Result, E->getType(), Index); @@ -8125,16 +8152,18 @@ bool LValueExprEvaluator::VisitCompoundAssignOperator( if (!Info.getLangOpts().CPlusPlus14 && !Info.keepEvaluatingAfterFailure()) return Error(CAO); - APValue RHS; + bool Success = true; - // The overall lvalue result is the result of evaluating the LHS. - if (!this->Visit(CAO->getLHS())) { - if (Info.noteFailure()) - Evaluate(RHS, this->Info, CAO->getRHS()); - return false; + // C++17 onwards require that we evaluate the RHS first. + APValue RHS; + if (!Evaluate(RHS, this->Info, CAO->getRHS())) { + if (!Info.noteFailure()) + return false; + Success = false; } - if (!Evaluate(RHS, this->Info, CAO->getRHS())) + // The overall lvalue result is the result of evaluating the LHS. + if (!this->Visit(CAO->getLHS()) || !Success) return false; return handleCompoundAssignment( @@ -8147,15 +8176,17 @@ bool LValueExprEvaluator::VisitBinAssign(const BinaryOperator *E) { if (!Info.getLangOpts().CPlusPlus14 && !Info.keepEvaluatingAfterFailure()) return Error(E); - APValue NewVal; + bool Success = true; - if (!this->Visit(E->getLHS())) { - if (Info.noteFailure()) - Evaluate(NewVal, this->Info, E->getRHS()); - return false; + // C++17 onwards require that we evaluate the RHS first. + APValue NewVal; + if (!Evaluate(NewVal, this->Info, E->getRHS())) { + if (!Info.noteFailure()) + return false; + Success = false; } - if (!Evaluate(NewVal, this->Info, E->getRHS())) + if (!this->Visit(E->getLHS()) || !Success) return false; if (Info.getLangOpts().CPlusPlus20 && @@ -15270,7 +15301,8 @@ bool Expr::isPotentialConstantExpr(const FunctionDecl *FD, } else { SourceLocation Loc = FD->getLocation(); HandleFunctionCall(Loc, FD, (MD && MD->isInstance()) ? &This : nullptr, - Args, FD->getBody(), Info, Scratch, nullptr); + Args, /*ArgValues*/ nullptr, FD->getBody(), Info, + Scratch, nullptr); } return Diags.empty(); @@ -15292,13 +15324,8 @@ bool Expr::isPotentialConstantExprUnevaluated(Expr *E, Info.CheckingPotentialConstantExpression = true; // Fabricate a call stack frame to give the arguments a plausible cover story. - ArrayRef Args; - ArgVector ArgValues(0); - bool Success = EvaluateArgs(Args, ArgValues, Info, FD); - (void)Success; - assert(Success && - "Failed to set up arguments for potential constant evaluation"); - CallStackFrame Frame(Info, SourceLocation(), FD, nullptr, ArgValues.data()); + CallStackFrame Frame(Info, SourceLocation(), FD, /*This*/ nullptr, + /*ArgValues*/ nullptr); APValue ResultScratch; Evaluate(ResultScratch, Info, E); diff --git a/clang/test/SemaCXX/constant-expression-cxx1z.cpp b/clang/test/SemaCXX/constant-expression-cxx1z.cpp index 2b366adf2e914..9335626a5c90a 100644 --- a/clang/test/SemaCXX/constant-expression-cxx1z.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx1z.cpp @@ -59,3 +59,121 @@ void test() { else if constexpr (v) {} } } + +// Check that assignment operators evaluate their operands right-to-left. +namespace EvalOrder { + template struct lvalue { + T t; + constexpr T &get() { return t; } + }; + + struct UserDefined { + int n = 0; + constexpr UserDefined &operator=(const UserDefined&) { return *this; } + constexpr UserDefined &operator+=(const UserDefined&) { return *this; } + constexpr void operator<<(const UserDefined&) const {} + constexpr void operator>>(const UserDefined&) const {} + constexpr void operator+(const UserDefined&) const {} + constexpr void operator[](int) const {} + }; + constexpr UserDefined ud; + + struct NonMember {}; + constexpr void operator+=(NonMember, NonMember) {} + constexpr void operator<<(NonMember, NonMember) {} + constexpr void operator>>(NonMember, NonMember) {} + constexpr void operator+(NonMember, NonMember) {} + constexpr NonMember nm; + + constexpr void f(...) {} + + // Helper to ensure that 'a' is evaluated before 'b'. + struct seq_checker { + bool done_a = false; + bool done_b = false; + + template constexpr T &&a(T &&v) { + done_a = true; + return (T &&)v; + } + template constexpr T &&b(T &&v) { + if (!done_a) + throw "wrong"; + done_b = true; + return (T &&)v; + } + + constexpr bool ok() { return done_a && done_b; } + }; + + // SEQ(expr), where part of the expression is tagged A(...) and part is + // tagged B(...), checks that A is evaluated before B. + #define A sc.a + #define B sc.b + #define SEQ(...) static_assert([](seq_checker sc) { void(__VA_ARGS__); return sc.ok(); }({})) + + // Longstanding sequencing rules. + SEQ((A(1), B(2))); + SEQ((A(true) ? B(2) : throw "huh?")); + SEQ((A(false) ? throw "huh?" : B(2))); + SEQ(A(true) && B(true)); + SEQ(A(false) || B(true)); + + // From P0145R3: + + // Rules 1 and 2 have no effect ('b' is not an expression). + + // Rule 3: a->*b + SEQ(A(ud).*B(&UserDefined::n)); + SEQ(A(&ud)->*B(&UserDefined::n)); + + // Rule 4: a(b1, b2, b3) + SEQ(A(f)(B(1), B(2), B(3))); + + // Rule 5: b = a, b @= a + SEQ(B(lvalue().get()) = A(0)); + SEQ(B(lvalue().get()) = A(ud)); + SEQ(B(lvalue().get()) += A(0)); + SEQ(B(lvalue().get()) += A(ud)); + SEQ(B(lvalue().get()) += A(nm)); + + // Rule 6: a[b] + constexpr int arr[3] = {}; + SEQ(A(arr)[B(0)]); + SEQ(A(+arr)[B(0)]); + SEQ(A(0)[B(arr)]); + SEQ(A(0)[B(+arr)]); + SEQ(A(ud)[B(0)]); + + // Rule 7: a << b + SEQ(A(1) << B(2)); + SEQ(A(ud) << B(ud)); + SEQ(A(nm) << B(nm)); + + // Rule 8: a >> b + SEQ(A(1) >> B(2)); + SEQ(A(ud) >> B(ud)); + SEQ(A(nm) >> B(nm)); + + // No particular order of evaluation is specified in other cases, but we in + // practice evaluate left-to-right. + // FIXME: Technically we're expected to check for undefined behavior due to + // unsequenced read and modification and treat it as non-constant due to UB. + SEQ(A(1) + B(2)); + SEQ(A(ud) + B(ud)); + SEQ(A(nm) + B(nm)); + SEQ(f(A(1), B(2))); + + #undef SEQ + #undef A + #undef B +} + +namespace LambdaCallOp { + constexpr void get_lambda(void (*&p)()) { p = []{}; } + constexpr void call_lambda() { + void (*p)() = nullptr; + get_lambda(p); + p(); + } +} diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index 3c546eb409dee..9c39e396edd48 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -807,6 +807,7 @@

C++17 implementation status

operator&&, operator||, and operator, functions using expression syntax are no longer guaranteed to be destroyed in reverse construction order in that ABI. +This is not fully supported during constant expression evaluation until Clang 12.

(10): Despite being the resolution to a Defect Report, this feature is disabled by default in all language versions, and can be enabled From 370b7887e5af413e06cb0bdb23f502ca7a56280d Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 6 Oct 2020 14:13:55 -0400 Subject: [PATCH 177/321] [libc++] Add a script to setup CI on macOS nodes --- libcxx/utils/ci/macos-ci-setup | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100755 libcxx/utils/ci/macos-ci-setup diff --git a/libcxx/utils/ci/macos-ci-setup b/libcxx/utils/ci/macos-ci-setup new file mode 100755 index 0000000000000..d6e9c8aa3f65f --- /dev/null +++ b/libcxx/utils/ci/macos-ci-setup @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +# This simple script can be used to set up a CI node running MacOS. +# An additional requirement that is *not* handled by this script is the +# installation of Xcode, which requires manual intervention. + +source secrets.env + +# Install Homebrew +/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)" + +# Install the required tools to run CI +brew install sphinx-doc python3 buildkite-agent ninja cmake + +CFG_DIR="$(brew --prefix)/etc/buildkite-agent" + +# Setup the CONDUIT_TOKEN in the BuildKite Agent, and install the Phabricator +# Python module to allow reporting results back to Phabricator. +mv "${CFG_DIR}/hooks/environment.sample" "${CFG_DIR}/hooks/environment" +echo "export CONDUIT_TOKEN=${CONDUIT_TOKEN}" >> "${CFG_DIR}/hooks/environment" +pip3 install --user phabricator + +# Setup the tags of the agent +echo 'tags="queue=libcxx-macos-builders"' >> "${CFG_DIR}/buildkite-agent.cfg" + +# Setup the BuildKite Agent token +sed -i '' "s/xxx/${BUILDKITE_AGENT_TOKEN}/g" "${CFG_DIR}/buildkite-agent.cfg" From d3d790fc9827301a49c7bbab3c1eb22da48085c6 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Tue, 6 Oct 2020 15:07:42 -0400 Subject: [PATCH 178/321] Revert [lit] Support running tests on Windows without GnuWin32 This reverts b3418cb4eb1456c41606f4621dcfa362fe54183c and d12ae042e17b27ebc8d2b5ae3d8dd5f88384d093 This breaks some external bots, see discussion in https://reviews.llvm.org/D84380 In the meanwhile, please use `cmake -DLLVM_LIT_TOOLS_DIR="C:/Program Files/Git/usr/bin"` or add it to %PATH%. --- llvm/utils/lit/lit/llvm/config.py | 49 +++++-------------------------- 1 file changed, 8 insertions(+), 41 deletions(-) diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py index c8013945e3f93..e9fd75e0a5fad 100644 --- a/llvm/utils/lit/lit/llvm/config.py +++ b/llvm/utils/lit/lit/llvm/config.py @@ -1,4 +1,3 @@ -import itertools import os import platform import re @@ -21,16 +20,13 @@ def __init__(self, lit_config, config): self.use_lit_shell = False # Tweak PATH for Win32 to decide to use bash.exe or not. if sys.platform == 'win32': - # Seek necessary tools in directories and set to $PATH. - path = None - lit_tools_dir = getattr(config, 'lit_tools_dir', None) - required_tools = ['cmp.exe', 'grep.exe', 'sed.exe', 'diff.exe', 'echo.exe'] - if lit_tools_dir: - path = self.lit_config.getToolsPath(lit_tools_dir, - config.environment['PATH'], - required_tools) - if path is None: - path = self._find_git_windows_unix_tools(required_tools) + # For tests that require Windows to run. + features.add('system-windows') + + # Seek sane tools in directories and set to $PATH. + path = self.lit_config.getToolsPath(config.lit_tools_dir, + config.environment['PATH'], + ['cmp.exe', 'grep.exe', 'sed.exe']) if path is not None: self.with_environment('PATH', path, append_path=True) # Many tools behave strangely if these environment variables aren't set. @@ -121,35 +117,6 @@ def __init__(self, lit_config, config): self.with_environment( 'DYLD_INSERT_LIBRARIES', gmalloc_path_str) - def _find_git_windows_unix_tools(self, tools_needed): - assert(sys.platform == 'win32') - if sys.version_info.major >= 3: - import winreg - else: - import _winreg as winreg - - # Search both the 64 and 32-bit hives, as well as HKLM + HKCU - masks = [0, winreg.KEY_WOW64_64KEY] - hives = [winreg.HKEY_LOCAL_MACHINE, winreg.HKEY_CURRENT_USER] - for mask, hive in itertools.product(masks, hives): - try: - with winreg.OpenKey(hive, r"SOFTWARE\GitForWindows", 0, - winreg.KEY_READ | mask) as key: - install_root, _ = winreg.QueryValueEx(key, 'InstallPath') - - if not install_root: - continue - candidate_path = os.path.join(install_root, 'usr', 'bin') - if not lit.util.checkToolsPath(candidate_path, tools_needed): - continue - - # We found it, stop enumerating. - return lit.util.to_string(candidate_path) - except: - continue - - return None - def with_environment(self, variable, value, append_path=False): if append_path: # For paths, we should be able to take a list of them and process all @@ -169,7 +136,7 @@ def norm(x): paths = [] # If we are passed a list [a b c], then iterating this list forwards - # and adding each to the beginning would result in c b a. So we + # and adding each to the beginning would result in b c a. So we # need to iterate in reverse to end up with the original ordering. for p in reversed(paths_to_add): # Move it to the front if it already exists, otherwise insert it at the From 5a305f81bfc3cb71f1f77f15d08dd62b32865e8a Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 6 Oct 2020 20:11:39 +0000 Subject: [PATCH 179/321] Remove unneeded "allow-unregistered-dialect" from shape-type-conversion.mlir test (NFC) --- mlir/test/Dialect/Shape/shape-type-conversion.mlir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/test/Dialect/Shape/shape-type-conversion.mlir b/mlir/test/Dialect/Shape/shape-type-conversion.mlir index 8985a6da02510..52bc6658a8212 100644 --- a/mlir/test/Dialect/Shape/shape-type-conversion.mlir +++ b/mlir/test/Dialect/Shape/shape-type-conversion.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect -split-input-file -shape-tensor-to-memref <%s | FileCheck %s +// RUN: mlir-opt -split-input-file -shape-tensor-to-memref <%s | FileCheck %s // ----- // Check that shape.assuming returns a memref. From c6c67f643dcff142b26a53059e63e5369e6d8d89 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Tue, 6 Oct 2020 12:15:36 -0700 Subject: [PATCH 180/321] [mlir] [sparse] convenience runtime support to read Matrix Market format Setting up input data for benchmarks and integration tests can be tedious in pure MLIR. With more sparse tensor work planned, this convenience library simplifies reading sparse matrices in the popular Matrix Market Exchange Format (see https://math.nist.gov/MatrixMarket). Note that this library is *not* part of core MLIR. It is merely intended as a convenience library for benchmarking and integration testing. Reviewed By: penpornk Differential Revision: https://reviews.llvm.org/D88856 --- mlir/integration_test/CMakeLists.txt | 4 + .../integration_test/Sparse/CPU/lit.local.cfg | 5 + .../Sparse/CPU/matrix-market-example.mlir | 100 ++++++++++ mlir/integration_test/data/test.mtx | 15 ++ mlir/lib/ExecutionEngine/CMakeLists.txt | 3 + mlir/lib/ExecutionEngine/SparseUtils.cpp | 172 ++++++++++++++++++ 6 files changed, 299 insertions(+) create mode 100644 mlir/integration_test/Sparse/CPU/lit.local.cfg create mode 100644 mlir/integration_test/Sparse/CPU/matrix-market-example.mlir create mode 100644 mlir/integration_test/data/test.mtx create mode 100644 mlir/lib/ExecutionEngine/SparseUtils.cpp diff --git a/mlir/integration_test/CMakeLists.txt b/mlir/integration_test/CMakeLists.txt index 8201cd79e37fd..bc5ad90e12537 100644 --- a/mlir/integration_test/CMakeLists.txt +++ b/mlir/integration_test/CMakeLists.txt @@ -28,3 +28,7 @@ add_dependencies(check-mlir check-mlir-integration) add_lit_testsuites(MLIR_INTEGRATION ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${MLIR_INTEGRATION_TEST_DEPENDS} ) + +# Copy test data over. +file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/test.mtx + DESTINATION ${MLIR_INTEGRATION_TEST_DIR}/data/) diff --git a/mlir/integration_test/Sparse/CPU/lit.local.cfg b/mlir/integration_test/Sparse/CPU/lit.local.cfg new file mode 100644 index 0000000000000..83247d7e37449 --- /dev/null +++ b/mlir/integration_test/Sparse/CPU/lit.local.cfg @@ -0,0 +1,5 @@ +import sys + +# No JIT on win32. +if sys.platform == 'win32': + config.unsupported = True diff --git a/mlir/integration_test/Sparse/CPU/matrix-market-example.mlir b/mlir/integration_test/Sparse/CPU/matrix-market-example.mlir new file mode 100644 index 0000000000000..31fb20fa11ccf --- /dev/null +++ b/mlir/integration_test/Sparse/CPU/matrix-market-example.mlir @@ -0,0 +1,100 @@ +// RUN: mlir-opt %s \ +// RUN: -convert-scf-to-std -convert-vector-to-scf \ +// RUN: -convert-linalg-to-llvm -convert-vector-to-llvm | \ +// RUN: SPARSE_MATRIX0="%mlir_integration_test_dir/data/test.mtx" \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +module { + func @openMatrix(!llvm.ptr, memref, memref, memref) -> () + func @readMatrixItem(memref, memref, memref) -> () + func @closeMatrix() -> () + func @getSparseMatrix(index) -> (!llvm.ptr) + + func @entry() { + %d0 = constant 0.0 : f64 + %c0 = constant 0 : index + %c1 = constant 1 : index + %c5 = constant 5 : index + %m = alloc() : memref + %n = alloc() : memref + %nnz = alloc() : memref + %i = alloc() : memref + %j = alloc() : memref + %d = alloc() : memref + + // + // Read the header of a sparse matrix. This yields the + // size (m x n) and number of nonzero elements (nnz). + // + %file = call @getSparseMatrix(%c0) : (index) -> (!llvm.ptr) + call @openMatrix(%file, %m, %n, %nnz) + : (!llvm.ptr, memref, + memref, memref) -> () + %M = load %m[] : memref + %N = load %n[] : memref + %Z = load %nnz[] : memref + + // + // At this point, code should prepare a proper sparse storage + // scheme for an m x n matrix with nnz nonzero elements. For + // simplicity, however, here we simply set up a dense matrix. + // + %a = alloc(%M, %N) : memref + scf.for %ii = %c0 to %M step %c1 { + scf.for %jj = %c0 to %N step %c1 { + store %d0, %a[%ii, %jj] : memref + } + } + + // + // Now we are ready to read in the nonzero elements of the + // sparse matrix and insert these into a sparse storage + // scheme. In this example, we simply insert them in the + // dense matrix. + // + scf.for %k = %c0 to %Z step %c1 { + call @readMatrixItem(%i, %j, %d) + : (memref, memref, memref) -> () + %I = load %i[] : memref + %J = load %j[] : memref + %D = load %d[] : memref + store %D, %a[%I, %J] : memref + } + call @closeMatrix() : () -> () + + // + // Verify that the results are as expected. + // + %A = vector.transfer_read %a[%c0, %c0], %d0 : memref, vector<5x5xf64> + vector.print %M : index + vector.print %N : index + vector.print %Z : index + vector.print %A : vector<5x5xf64> + // + // CHECK: 5 + // CHECK: 5 + // CHECK: 9 + // + // CHECK: ( ( 1, 0, 0, 1.4, 0 ), + // CHECK-SAME: ( 0, 2, 0, 0, 2.5 ), + // CHECK-SAME: ( 0, 0, 3, 0, 0 ), + // CHECK-SAME: ( 4.1, 0, 0, 4, 0 ), + // CHECK-SAME: ( 0, 5.2, 0, 0, 5 ) ) + + // + // Free. + // + dealloc %m : memref + dealloc %n : memref + dealloc %nnz : memref + dealloc %i : memref + dealloc %j : memref + dealloc %d : memref + dealloc %a : memref + + return + } +} diff --git a/mlir/integration_test/data/test.mtx b/mlir/integration_test/data/test.mtx new file mode 100644 index 0000000000000..13a34fba67c79 --- /dev/null +++ b/mlir/integration_test/data/test.mtx @@ -0,0 +1,15 @@ +%%MatrixMarket matrix coordinate real general +% +% This is a test sparse matrix in Matrix Market Exchange Format. +% see https://math.nist.gov/MatrixMarket +% +5 5 9 +1 1 1.0 +1 4 1.4 +2 2 2.0 +2 5 2.5 +3 3 3.0 +4 4 4.0 +4 1 4.1 +5 5 5.0 +5 2 5.2 diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt index c71caf06ee09a..373df9f1468dc 100644 --- a/mlir/lib/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_OPTIONAL_SOURCES CRunnerUtils.cpp + SparseUtils.cpp ExecutionEngine.cpp RunnerUtils.cpp OptUtils.cpp @@ -70,6 +71,7 @@ add_mlir_library(MLIRJitRunner add_mlir_library(mlir_c_runner_utils SHARED CRunnerUtils.cpp + SparseUtils.cpp EXCLUDE_FROM_LIBMLIR ) @@ -77,6 +79,7 @@ set_property(TARGET mlir_c_runner_utils PROPERTY CXX_STANDARD 11) add_mlir_library(mlir_c_runner_utils_static CRunnerUtils.cpp + SparseUtils.cpp EXCLUDE_FROM_LIBMLIR ) diff --git a/mlir/lib/ExecutionEngine/SparseUtils.cpp b/mlir/lib/ExecutionEngine/SparseUtils.cpp new file mode 100644 index 0000000000000..6942a7b260c5e --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseUtils.cpp @@ -0,0 +1,172 @@ +//===- SparseUtils.cpp - Sparse Utils for MLIR execution ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a light-weight runtime library that is useful for +// sparse tensor manipulations. The functionality provided in this library +// is meant to simplify benchmarking, testing, and debugging MLIR code that +// operates on sparse tensors. The provided functionality is **not** part +// of core MLIR, however. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +//===----------------------------------------------------------------------===// +// +// Internal support for reading matrices in the Matrix Market Exchange Format. +// See https://math.nist.gov/MatrixMarket for details on this format. +// +//===----------------------------------------------------------------------===// + +// Helper to convert string to lower case. +static char *toLower(char *token) { + for (char *c = token; *c; c++) + *c = tolower(*c); + return token; +} + +// Read the header of a general sparse matrix of type real. +// +// TODO: support other formats as well? +// +static void readHeader(FILE *file, char *name, uint64_t *m, uint64_t *n, + uint64_t *nnz) { + char line[1025]; + char header[64]; + char object[64]; + char format[64]; + char field[64]; + char symmetry[64]; + // Read header line. + if (fscanf(file, "%63s %63s %63s %63s %63s\n", header, object, format, field, + symmetry) != 5) { + fprintf(stderr, "Corrupt header in %s\n", name); + exit(1); + } + // Make sure this is a general sparse matrix. + if (strcmp(toLower(header), "%%matrixmarket") || + strcmp(toLower(object), "matrix") || + strcmp(toLower(format), "coordinate") || strcmp(toLower(field), "real") || + strcmp(toLower(symmetry), "general")) { + fprintf(stderr, + "Cannot find a general sparse matrix with type real in %s\n", name); + exit(1); + } + // Skip comments. + while (1) { + if (!fgets(line, 1025, file)) { + fprintf(stderr, "Cannot find data in %s\n", name); + exit(1); + } + if (line[0] != '%') + break; + } + // Next line contains M N NNZ. + if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64, m, n, nnz) != 3) { + fprintf(stderr, "Cannot find size in %s\n", name); + exit(1); + } +} + +// Read next data item. +static void readItem(FILE *file, char *name, uint64_t *i, uint64_t *j, + double *d) { + if (fscanf(file, "%" PRIu64 " %" PRIu64 " %lg\n", i, j, d) != 3) { + fprintf(stderr, "Cannot find next data item in %s\n", name); + exit(1); + } + // Translate 1-based to 0-based. + *i = *i - 1; + *j = *j - 1; +} + +//===----------------------------------------------------------------------===// +// +// Public API of the sparse runtime library. +// +// Enables MLIR code to read a matrix in Matrix Market Exchange Format +// as follows: +// +// call @openMatrix("A.mtx", %m, %n, %nnz) : (!llvm.ptr, +// memref, +// memref, +// memref) -> () +// .... prepare reading in m x n matrix A with nnz nonzero elements .... +// %u = load %nnz[] : memref +// scf.for %k = %c0 to %u step %c1 { +// call @readMatrixItem(%i, %j, %d) : (memref, +// memref, memref) -> () +// .... process next nonzero element A[i][j] = d .... +// } +// call @closeMatrix() : () -> () +// +// The implementation is *not* thread-safe. Also, only *one* matrix file can +// be open at the time. A matrix file must be closed before reading in a next. +// +// Note that input parameters mimic the layout of a MemRef: +// struct MemRef { +// T *base; +// T *data; +// int64_t off; +// } +//===----------------------------------------------------------------------===// + +// Currently open matrix. This is *not* thread-safe or re-entrant. +static FILE *sparseFile = nullptr; +static char *sparseFilename = nullptr; + +extern "C" void openMatrix(char *filename, uint64_t *mbase, uint64_t *mdata, + int64_t moff, uint64_t *nbase, uint64_t *ndata, + int64_t noff, uint64_t *nnzbase, uint64_t *nnzdata, + int64_t nnzoff) { + if (sparseFile != nullptr) { + fprintf(stderr, "Other file still open %s vs. %s\n", sparseFilename, + filename); + exit(1); + } + sparseFile = fopen(filename, "r"); + if (!sparseFile) { + fprintf(stderr, "Cannot find %s\n", filename); + exit(1); + } + sparseFilename = filename; + readHeader(sparseFile, filename, mdata, ndata, nnzdata); +} + +extern "C" void readMatrixItem(uint64_t *ibase, uint64_t *idata, int64_t ioff, + uint64_t *jbase, uint64_t *jdata, int64_t joff, + double *dbase, double *ddata, int64_t doff) { + if (sparseFile == nullptr) { + fprintf(stderr, "Cannot read item from unopened matrix\n"); + exit(1); + } + readItem(sparseFile, sparseFilename, idata, jdata, ddata); +} + +extern "C" void closeMatrix() { + if (sparseFile == nullptr) { + fprintf(stderr, "Cannot close unopened matrix\n"); + exit(1); + } + fclose(sparseFile); + sparseFile = nullptr; + sparseFilename = nullptr; +} + +// Helper method to read sparse matrix filenames from the environment, defined +// with the naming convention ${SPARSE_MATRIX0}, ${SPARSE_MATRIX1}, etc. +extern "C" char *getSparseMatrix(uint64_t id) { + char var[80]; + sprintf(var, "SPARSE_MATRIX%lu", id); + char *env = getenv(var); + return env; +} From 9eff07a746a9e6a9b105c12b5e28073360fa8065 Mon Sep 17 00:00:00 2001 From: Chris Palmer Date: Tue, 6 Oct 2020 13:01:50 -0400 Subject: [PATCH 181/321] [libc++] Add assert to check bounds in `constexpr string_view::operator[]` Differential Revision: https://reviews.llvm.org/D88864 --- libcxx/include/string_view | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libcxx/include/string_view b/libcxx/include/string_view index 8a684a8f966ce..884bcf806c454 100644 --- a/libcxx/include/string_view +++ b/libcxx/include/string_view @@ -278,7 +278,9 @@ public: // [string.view.access], element access _LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY - const_reference operator[](size_type __pos) const _NOEXCEPT { return __data[__pos]; } + const_reference operator[](size_type __pos) const _NOEXCEPT { + return _LIBCPP_ASSERT(__pos < size(), "string_view[] index out of bounds"), __data[__pos]; + } _LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY const_reference at(size_type __pos) const From 4540d6624838af2b190dfb33802528eb4bfb3fb8 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Tue, 6 Oct 2020 11:32:57 -0700 Subject: [PATCH 182/321] [CMake] Track TSan's dependency on C++ headers TSan relies on C++ headers, so when libc++ is being built as part of the runtimes build, include an explicit dependency on cxx-headers which is the same approach that's already used for other sanitizers. Differential Revision: https://reviews.llvm.org/D88912 --- compiler-rt/cmake/Modules/AddCompilerRT.cmake | 6 +++++- compiler-rt/lib/tsan/CMakeLists.txt | 10 +++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake index f2f0b5ecde590..90a946a0df7da 100644 --- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake +++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake @@ -140,6 +140,7 @@ endmacro() # CFLAGS # LINK_FLAGS # DEFS +# DEPS # LINK_LIBS (only for shared library) # OBJECT_LIBS # PARENT_TARGET @@ -152,7 +153,7 @@ function(add_compiler_rt_runtime name type) cmake_parse_arguments(LIB "" "PARENT_TARGET" - "OS;ARCHS;SOURCES;CFLAGS;LINK_FLAGS;DEFS;LINK_LIBS;OBJECT_LIBS;ADDITIONAL_HEADERS" + "OS;ARCHS;SOURCES;CFLAGS;LINK_FLAGS;DEFS;DEPS;LINK_LIBS;OBJECT_LIBS;ADDITIONAL_HEADERS" ${ARGN}) set(libnames) # Until we support this some other way, build compiler-rt runtime without LTO @@ -329,6 +330,9 @@ function(add_compiler_rt_runtime name type) RUNTIME DESTINATION ${install_dir_${libname}} ${COMPONENT_OPTION}) endif() + if(LIB_DEPS) + add_dependencies(${libname} ${LIB_DEPS}) + endif() set_target_properties(${libname} PROPERTIES OUTPUT_NAME ${output_name_${libname}}) set_target_properties(${libname} PROPERTIES FOLDER "Compiler-RT Runtime") diff --git a/compiler-rt/lib/tsan/CMakeLists.txt b/compiler-rt/lib/tsan/CMakeLists.txt index c99b16d8aaa3a..3c6731d79dc45 100644 --- a/compiler-rt/lib/tsan/CMakeLists.txt +++ b/compiler-rt/lib/tsan/CMakeLists.txt @@ -25,6 +25,10 @@ append_list_if(SANITIZER_LIMIT_FRAME_SIZE -Wframe-larger-than=530 append_list_if(COMPILER_RT_HAS_WGLOBAL_CONSTRUCTORS_FLAG -Wglobal-constructors TSAN_RTL_CFLAGS) +if (TARGET cxx-headers OR HAVE_LIBCXX) + set(TSAN_DEPS cxx-headers) +endif() + set(TSAN_SOURCES rtl/tsan_clock.cpp rtl/tsan_debugging.cpp @@ -142,6 +146,7 @@ if(APPLE) RTSanitizerCommonSymbolizer RTUbsan CFLAGS ${TSAN_RTL_CFLAGS} + DEPS ${TSAN_DEPS} LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS} LINK_LIBS ${TSAN_LINK_LIBS} objc PARENT_TARGET tsan) @@ -150,7 +155,8 @@ if(APPLE) ARCHS ${TSAN_SUPPORTED_ARCH} SOURCES ${TSAN_SOURCES} ${TSAN_CXX_SOURCES} ${TSAN_ASM_SOURCES} ADDITIONAL_HEADERS ${TSAN_HEADERS} - CFLAGS ${TSAN_RTL_CFLAGS}) + CFLAGS ${TSAN_RTL_CFLAGS} + DEPS ${TSAN_DEPS}) # Build and check Go runtime. set(BUILDGO_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/go/buildgo.sh) @@ -214,6 +220,7 @@ else() $ ADDITIONAL_HEADERS ${TSAN_HEADERS} CFLAGS ${TSAN_RTL_CFLAGS} + DEPS ${TSAN_DEPS} PARENT_TARGET tsan) add_compiler_rt_runtime(clang_rt.tsan_cxx STATIC @@ -222,6 +229,7 @@ else() $ ADDITIONAL_HEADERS ${TSAN_HEADERS} CFLAGS ${TSAN_RTL_CFLAGS} + DEPS ${TSAN_DEPS} PARENT_TARGET tsan) list(APPEND TSAN_RUNTIME_LIBRARIES clang_rt.tsan-${arch} clang_rt.tsan_cxx-${arch}) From 4d1d8ae7100ec3c7e1709addb7b3ec6f9ad0b44f Mon Sep 17 00:00:00 2001 From: Jianzhou Zhao Date: Mon, 5 Oct 2020 16:56:50 +0000 Subject: [PATCH 183/321] Replace shadow space zero-out by madvise at mmap After D88686, munmap uses MADV_DONTNEED to ensure zero-out before the next access. Because the entire shadow space is created by MAP_PRIVATE and MAP_ANONYMOUS, the first access is also on zero-filled values. So it is fine to not zero-out data, but use madvise(MADV_DONTNEED) at mmap. This reduces runtime overhead. Reviewed-by: morehouse Differential Revision: https://reviews.llvm.org/D88755 --- compiler-rt/lib/dfsan/dfsan_interceptors.cpp | 24 ++++-- .../test/dfsan/munmap_release_shadow.c | 54 ------------ compiler-rt/test/dfsan/release_shadow_space.c | 83 +++++++++++++++++++ 3 files changed, 99 insertions(+), 62 deletions(-) delete mode 100644 compiler-rt/test/dfsan/munmap_release_shadow.c create mode 100644 compiler-rt/test/dfsan/release_shadow_space.c diff --git a/compiler-rt/lib/dfsan/dfsan_interceptors.cpp b/compiler-rt/lib/dfsan/dfsan_interceptors.cpp index 5ab7c2b4828ca..e322d18f7d331 100644 --- a/compiler-rt/lib/dfsan/dfsan_interceptors.cpp +++ b/compiler-rt/lib/dfsan/dfsan_interceptors.cpp @@ -20,7 +20,19 @@ using namespace __sanitizer; -static bool interceptors_initialized; +namespace { + +bool interceptors_initialized; + +void ReleaseShadowMemoryPagesToOS(void *addr, SIZE_T length) { + uptr beg_shadow_addr = (uptr)__dfsan::shadow_for(addr); + void *end_addr = + (void *)((uptr)addr + RoundUpTo(length, GetPageSizeCached())); + uptr end_shadow_addr = (uptr)__dfsan::shadow_for(end_addr); + ReleaseMemoryPagesToOS(beg_shadow_addr, end_shadow_addr); +} + +} // namespace INTERCEPTOR(void *, mmap, void *addr, SIZE_T length, int prot, int flags, int fd, OFF_T offset) { @@ -34,7 +46,7 @@ INTERCEPTOR(void *, mmap, void *addr, SIZE_T length, int prot, int flags, res = REAL(mmap)(addr, length, prot, flags, fd, offset); if (res != (void *)-1) - dfsan_set_label(0, res, RoundUpTo(length, GetPageSize())); + ReleaseShadowMemoryPagesToOS(res, length); return res; } @@ -42,18 +54,14 @@ INTERCEPTOR(void *, mmap64, void *addr, SIZE_T length, int prot, int flags, int fd, OFF64_T offset) { void *res = REAL(mmap64)(addr, length, prot, flags, fd, offset); if (res != (void *)-1) - dfsan_set_label(0, res, RoundUpTo(length, GetPageSize())); + ReleaseShadowMemoryPagesToOS(res, length); return res; } INTERCEPTOR(int, munmap, void *addr, SIZE_T length) { int res = REAL(munmap)(addr, length); if (res != -1) { - uptr beg_shadow_addr = (uptr)__dfsan::shadow_for(addr); - void *end_addr = - (void *)((uptr)addr + RoundUpTo(length, GetPageSizeCached())); - uptr end_shadow_addr = (uptr)__dfsan::shadow_for(end_addr); - ReleaseMemoryPagesToOS(beg_shadow_addr, end_shadow_addr); + ReleaseShadowMemoryPagesToOS(addr, length); } return res; } diff --git a/compiler-rt/test/dfsan/munmap_release_shadow.c b/compiler-rt/test/dfsan/munmap_release_shadow.c deleted file mode 100644 index 03197dfb86413..0000000000000 --- a/compiler-rt/test/dfsan/munmap_release_shadow.c +++ /dev/null @@ -1,54 +0,0 @@ -// RUN: %clang_dfsan %s -o %t && %run %t - -#include -#include -#include -#include -#include -#include -#include - -size_t get_rss_kb() { - long rss = 0L; - FILE *f = NULL; - assert((f = fopen("/proc/self/statm", "r"))); - assert(fscanf(f, "%*s%ld", &rss) == 1); - fclose(f); - return ((size_t)rss * (size_t)sysconf(_SC_PAGESIZE)) >> 10; -} - -int main(int argc, char **argv) { - const size_t map_size = 100 << 20; - size_t before = get_rss_kb(); - - char *p = mmap(NULL, map_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - const dfsan_label label = dfsan_create_label("l", 0); - char val = 0xff; - dfsan_set_label(label, &val, sizeof(val)); - memset(p, val, map_size); - size_t after_mmap = get_rss_kb(); - - munmap(p, map_size); - size_t after_munmap = get_rss_kb(); - - p = mmap(NULL, map_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - dfsan_set_label(label, &val, sizeof(val)); - memset(p, val, map_size); - size_t after_mmap2 = get_rss_kb(); - - fprintf(stderr, "RSS at start: %td, after mmap: %td, after mumap: %td, after mmap2: %td\n", - before, after_mmap, after_munmap, after_mmap2); - - // The memory after mmap increases 3 times of map_size because the overhead of - // shadow memory is 2x. - const size_t mmap_cost_kb = 3 * (map_size >> 10); - assert(after_mmap >= before + mmap_cost_kb); - // OS does not release memory to the same level as the start of the program. - // The assert checks the memory after munmap up to a delta. - const size_t delta = 50000; - assert(after_mmap2 <= after_mmap + delta); - - return 0; -} diff --git a/compiler-rt/test/dfsan/release_shadow_space.c b/compiler-rt/test/dfsan/release_shadow_space.c new file mode 100644 index 0000000000000..40c0f3841eee4 --- /dev/null +++ b/compiler-rt/test/dfsan/release_shadow_space.c @@ -0,0 +1,83 @@ +// RUN: %clang_dfsan %s -o %t && %run %t + +#include +#include +#include +#include +#include +#include +#include + +size_t get_rss_kb() { + size_t ret = 0; + pid_t pid = getpid(); + + char fname[256]; + sprintf(fname, "/proc/%ld/task/%ld/smaps", (long)pid, (long)pid); + FILE *f = fopen(fname, "r"); + assert(f); + + char buf[256]; + while (fgets(buf, sizeof(buf), f) != NULL) { + int64_t rss; + if (sscanf(buf, "Rss: %ld kB", &rss) == 1) + ret += rss; + } + assert(feof(f)); + fclose(f); + + return ret; +} + +int main(int argc, char **argv) { + const size_t map_size = 100 << 20; + size_t before = get_rss_kb(); + + // mmap and touch all addresses. The overhead is 1x. + char *p = mmap(NULL, map_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + memset(p, 0xff, map_size); + size_t after_mmap = get_rss_kb(); + + // store labels to all addresses. The overhead is 2x. + const dfsan_label label = dfsan_create_label("l", 0); + char val = 0xff; + dfsan_set_label(label, &val, sizeof(val)); + memset(p, val, map_size); + size_t after_mmap_and_set_label = get_rss_kb(); + + // fixed-mmap the same address. OS recyles pages and reinitializes data at the + // address. This should be the same to calling munmap. + p = mmap(p, map_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + size_t after_fixed_mmap = get_rss_kb(); + + // store labels to all addresses. + memset(p, val, map_size); + size_t after_mmap_and_set_label2 = get_rss_kb(); + + // munmap the addresses. + munmap(p, map_size); + size_t after_munmap = get_rss_kb(); + + fprintf( + stderr, + "RSS at start: %zu, after mmap: %zu, after mmap+set label: %zu, after " + "fixed map: %zu, after another mmap+set label: %zu, after munmap: %zu\n", + before, after_mmap, after_mmap_and_set_label, after_fixed_mmap, + after_mmap_and_set_label2, after_munmap); + + const size_t mmap_cost_kb = map_size >> 10; + const size_t mmap_shadow_cost_kb = 2 * mmap_cost_kb; + assert(after_mmap >= before + mmap_cost_kb); + assert(after_mmap_and_set_label >= after_mmap + mmap_shadow_cost_kb); + assert(after_mmap_and_set_label2 >= before + mmap_cost_kb + mmap_shadow_cost_kb); + + // RSS may not change memory amount after munmap to the same level as the + // start of the program. The assert checks the memory up to a delta. + const size_t delta = 5000; + assert(after_fixed_mmap <= before + delta); + assert(after_munmap <= before + delta); + + return 0; +} From 7193f727988360acb7037b42851f0a6fae29af9b Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Tue, 6 Oct 2020 18:09:19 -0400 Subject: [PATCH 184/321] docs: Emphasize ArrayRef over SmallVectorImpl The section on SmallVector has a note about preferring SmallVectorImpl for APIs but doesn't mention ArrayRef. Although ArrayRef is discussed elsewhere, let's re-emphasize here. Differential Revision: https://reviews.llvm.org/D49881 --- llvm/docs/ProgrammersManual.rst | 37 ++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst index 8d028a64b65fb..d9925d69d9f6c 100644 --- a/llvm/docs/ProgrammersManual.rst +++ b/llvm/docs/ProgrammersManual.rst @@ -1541,30 +1541,43 @@ SmallVector has grown a few other minor advantages over std::vector, causing .. note:: - Prefer to use ``SmallVectorImpl`` as a parameter type. + Prefer to use ``ArrayRef`` or ``SmallVectorImpl`` as a parameter type. - In APIs that don't care about the "small size" (most?), prefer to use - the ``SmallVectorImpl`` class, which is basically just the "vector - header" (and methods) without the elements allocated after it. Note that - ``SmallVector`` inherits from ``SmallVectorImpl`` so the - conversion is implicit and costs nothing. E.g. + It's rarely appropriate to use ``SmallVector`` as a parameter type. + If an API only reads from the vector, it should use :ref:`ArrayRef + `. Even if an API updates the vector the "small size" is + unlikely to be relevant; such an API should use the ``SmallVectorImpl`` + class, which is the "vector header" (and methods) without the elements + allocated after it. Note that ``SmallVector`` inherits from + ``SmallVectorImpl`` so the conversion is implicit and costs nothing. E.g. .. code-block:: c++ - // BAD: Clients cannot pass e.g. SmallVector. + // DISCOURAGED: Clients cannot pass e.g. raw arrays. + hardcodedContiguousStorage(const SmallVectorImpl &In); + // ENCOURAGED: Clients can pass any contiguous storage of Foo. + allowsAnyContiguousStorage(ArrayRef In); + + void someFunc1() { + Foo Vec[] = { /* ... */ }; + hardcodedContiguousStorage(Vec); // Error. + allowsAnyContiguousStorage(Vec); // Works. + } + + // DISCOURAGED: Clients cannot pass e.g. SmallVector. hardcodedSmallSize(SmallVector &Out); - // GOOD: Clients can pass any SmallVector. + // ENCOURAGED: Clients can pass any SmallVector. allowsAnySmallSize(SmallVectorImpl &Out); - void someFunc() { + void someFunc2() { SmallVector Vec; hardcodedSmallSize(Vec); // Error. allowsAnySmallSize(Vec); // Works. } - Even though it has "``Impl``" in the name, this is so widely used that - it really isn't "private to the implementation" anymore. A name like - ``SmallVectorHeader`` would be more appropriate. + Even though it has "``Impl``" in the name, SmallVectorImpl is widely used + and is no longer "private to the implementation". A name like + ``SmallVectorHeader`` might be more appropriate. .. _dss_vector: From b45b5166f8f91e601ebd7bc73bb6800155c4c9bc Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 6 Oct 2020 15:03:13 -0700 Subject: [PATCH 185/321] [RuntimeDyld][COFF] Report fatal error on error, rather than emiting diagnostic. Report a fatal error if an IMAGE_REL_AMD64_ADDR32NB cannot be applied due to an out-of-range target. Previously we emitted a diagnostic to llvm::errs and continued. Patch by Dale Martin. Thanks Dale! --- .../RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h index ebe3ca33d3089..9df3e2e3c3bf1 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h @@ -113,11 +113,10 @@ class RuntimeDyldCOFFX86_64 : public RuntimeDyldCOFF { // The MemoryManager can make sure this is always true by forcing the // memory layout to be: CodeSection < ReadOnlySection < ReadWriteSection. const uint64_t ImageBase = getImageBase(); - if (Value < ImageBase || ((Value - ImageBase) > UINT32_MAX)) { - llvm::errs() << "IMAGE_REL_AMD64_ADDR32NB relocation requires an" - << "ordered section layout.\n"; - write32BitOffset(Target, 0, 0); - } else { + if (Value < ImageBase || ((Value - ImageBase) > UINT32_MAX)) + report_fatal_error("IMAGE_REL_AMD64_ADDR32NB relocation requires an " + "ordered section layout"); + else { write32BitOffset(Target, RE.Addend, Value - ImageBase); } break; From 5e4409f30817798377689ef6c229f1c82815e6e8 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 6 Oct 2020 18:13:44 -0400 Subject: [PATCH 186/321] Fix out-of-tree clang build due to sysexits change The sysexists change broke clang building out of tree against llvm. https://reviews.llvm.org/D88467 --- llvm/include/llvm/Config/config.h.cmake | 3 --- llvm/include/llvm/Config/llvm-config.h.cmake | 3 +++ llvm/include/llvm/Support/ExitCodes.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake index 9ad0d827dfd8d..fc3f94570e984 100644 --- a/llvm/include/llvm/Config/config.h.cmake +++ b/llvm/include/llvm/Config/config.h.cmake @@ -208,9 +208,6 @@ /* Define to 1 if you have the header file. */ #cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H} -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_SYSEXITS_H ${HAVE_SYSEXITS_H} - /* Define if the setupterm() function is supported this platform. */ #cmakedefine LLVM_ENABLE_TERMINFO ${LLVM_ENABLE_TERMINFO} diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake index ee299876825ea..452c56e408de0 100644 --- a/llvm/include/llvm/Config/llvm-config.h.cmake +++ b/llvm/include/llvm/Config/llvm-config.h.cmake @@ -88,4 +88,7 @@ /* Define if LLVM was built with a dependency to the tensorflow compiler */ #cmakedefine LLVM_HAVE_TF_AOT +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_SYSEXITS_H ${HAVE_SYSEXITS_H} + #endif diff --git a/llvm/include/llvm/Support/ExitCodes.h b/llvm/include/llvm/Support/ExitCodes.h index 2715c5ca4128b..b9041f5557d52 100644 --- a/llvm/include/llvm/Support/ExitCodes.h +++ b/llvm/include/llvm/Support/ExitCodes.h @@ -16,7 +16,7 @@ #ifndef LLVM_SUPPORT_EXITCODES_H #define LLVM_SUPPORT_EXITCODES_H -#include "llvm/Config/config.h" +#include "llvm/Config/llvm-config.h" #if HAVE_SYSEXITS_H #include From 7060920bd1f70b778105703a5c95066658ed5886 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Taei" Date: Tue, 6 Oct 2020 03:53:00 +0000 Subject: [PATCH 187/321] Relax FuseTensorReshapeOpAsproducer identity mapping constraint Differential Revision: https://reviews.llvm.org/D88869 --- .../Linalg/Transforms/FusionOnTensors.cpp | 20 ++-- mlir/test/Dialect/Linalg/fusion-tensor.mlir | 97 +++++++++++++++++++ 2 files changed, 110 insertions(+), 7 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp index a62b1ada2c187..ac57d5f97c1d4 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp @@ -326,7 +326,7 @@ static bool isTensorReshapeOpFusible(TensorReshapeOp reshapeOp, if ((asProducer && returnType.getRank() < operandType.getRank()) || (!asProducer && operandType.getRank() < returnType.getRank())) return false; - return useIndexMap.isIdentity(); + return useIndexMap.isPermutation(); } /// Based on the type of `op` create a linalg op of the same type, i.e. if `op` @@ -381,10 +381,13 @@ struct FuseTensorReshapeOpAsProducer { return attr.cast().getValue(); })); + // Accepted consumer maps are either identity or permutation. + auto invMap = inversePermutation(fusedIndexMaps[consumerIdx]); + // Compute the indexing map to use for the operand of the producer. - AffineMap modifiedMap = linearizeCollapsedDims( - fusedIndexMaps[consumerIdx], producer.getResultType().getShape(), - producer.getReassociationMaps()); + AffineMap modifiedMap = + linearizeCollapsedDims(invMap, producer.getResultType().getShape(), + producer.getReassociationMaps()); for (AffineExpr expr : modifiedMap.getResults()) { if (!expr.isPureAffine()) return nullptr; @@ -439,10 +442,13 @@ struct FuseTensorReshapeOpAsConsumer { producer.indexing_maps(), [](Attribute attr) -> AffineMap { return attr.cast().getValue(); })); + + auto invMap = inversePermutation(producer.getOutputIndexingMap(0)); + // Compute the indexing map to use for the operand of the producer. - AffineMap modifiedMap = linearizeCollapsedDims( - producer.getOutputIndexingMap(0), consumer.getSrcType().getShape(), - consumer.getReassociationMaps()); + AffineMap modifiedMap = + linearizeCollapsedDims(invMap, consumer.getSrcType().getShape(), + consumer.getReassociationMaps()); for (AffineExpr expr : modifiedMap.getResults()) { if (!expr.isPureAffine()) return nullptr; diff --git a/mlir/test/Dialect/Linalg/fusion-tensor.mlir b/mlir/test/Dialect/Linalg/fusion-tensor.mlir index ccadff54e40b7..3f8b0680d7a44 100644 --- a/mlir/test/Dialect/Linalg/fusion-tensor.mlir +++ b/mlir/test/Dialect/Linalg/fusion-tensor.mlir @@ -558,3 +558,100 @@ func @indexed_generic_op_reshape_consumer_fusion(%arg0 : tensor) // CHECK: linalg.indexed_generic // CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] // CHECK-NOT: linalg.tensor_reshape + +// ----- + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1 + d2 * 7)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> + +#map0 = affine_map<(d0, d1, d2) -> (d0)> +#map1 = affine_map<(d0, d1, d2) -> (d1, d2)> +#map2 = affine_map<(d0, d1, d2) -> (d0, d2, d1)> +#map3 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +func @generic_op_021_permultation_reshape_producer_fusion(%arg0 : tensor<3x35xf32>) -> tensor<3x7x5xf32> { + %0 = linalg.tensor_reshape %arg0 [#map0, #map1] : tensor<3x35xf32> into tensor<3x5x7xf32> + %1 = linalg.generic {indexing_maps = [#map2, #map3], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<3x5x7xf32>) { + ^bb0(%arg2: f32): // no predecessors + linalg.yield %arg2 : f32 + } -> tensor<3x7x5xf32> + return %1 : tensor<3x7x5xf32> +} + +// CHECK-LABEL: func @generic_op_021_permultation_reshape_producer_fusion +// CHECK-NOT: linalg.tensor_reshape +// CHECK: linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] +// CHECK-NOT: linalg.tensor_reshape + +// ----- + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d2, d0 * 7 + d1)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> + +#map0 = affine_map<(d0, d1, d2) -> (d0)> +#map1 = affine_map<(d0, d1, d2) -> (d1, d2)> +#map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)> +#map3 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +func @generic_op_120_permultation_reshape_producer_fusion(%arg0 : tensor<3x35xf32>) -> tensor<5x7x3xf32> { + %0 = linalg.tensor_reshape %arg0 [#map0, #map1] : tensor<3x35xf32> into tensor<3x5x7xf32> + %1 = linalg.generic {indexing_maps = [#map2, #map3], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<3x5x7xf32>) { + ^bb0(%arg2: f32): // no predecessors + linalg.yield %arg2 : f32 + } -> tensor<5x7x3xf32> + return %1 : tensor<5x7x3xf32> +} + +// CHECK-LABEL: func @generic_op_120_permultation_reshape_producer_fusion +// CHECK-NOT: linalg.tensor_reshape +// CHECK: linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] +// CHECK-NOT: linalg.tensor_reshape + +// ----- + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d1, d0 * 7 + d2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> + +#map0 = affine_map<(d0, d1, d2) -> (d0)> +#map1 = affine_map<(d0, d1, d2) -> (d1, d2)> +#map2 = affine_map<(d0, d1, d2) -> (d1, d0, d2)> +#map3 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +func @generic_op_102_permultation_reshape_producer_fusion(%arg0 : tensor<3x35xf32>) -> tensor<5x3x7xf32> { + %0 = linalg.tensor_reshape %arg0 [#map0, #map1] : tensor<3x35xf32> into tensor<3x5x7xf32> + %1 = linalg.generic {indexing_maps = [#map2, #map3], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<3x5x7xf32>) { + ^bb0(%arg2: f32): // no predecessors + linalg.yield %arg2 : f32 + } -> tensor<5x3x7xf32> + return %1 : tensor<5x3x7xf32> +} + +// CHECK-LABEL: func @generic_op_102_permultation_reshape_producer_fusion +// CHECK-NOT: linalg.tensor_reshape +// CHECK: linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] +// CHECK-NOT: linalg.tensor_reshape + +// ----- + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d1, d0 * 7 + d2)> + + +#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +#map1 = affine_map<(d0, d1, d2) -> (d1, d0, d2)> +#map2 = affine_map<(d0, d1, d2) -> (d0)> +#map3 = affine_map<(d0, d1, d2) -> (d1, d2)> +func @generic_op_102_permultation_reshape_consumer_fusion(%arg0 : tensor<3x5x7xf32>) -> tensor<5x21xf32> { + %0 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor<3x5x7xf32>) { + ^bb0(%arg2: f32): // no predecessors + linalg.yield %arg2 : f32 + } -> tensor<5x3x7xf32> + %1 = linalg.tensor_reshape %0 [#map2, #map3] : tensor<5x3x7xf32> into tensor<5x21xf32> + return %1 : tensor<5x21xf32> +} + +// CHECK-LABEL: func @generic_op_102_permultation_reshape_consumer_fusion +// CHECK-NOT: linalg.tensor_reshape +// CHECK: linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] +// CHECK-NOT: linalg.tensor_reshape From dfa70a483aad438cad826c414a5d710bab6ecf49 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 6 Oct 2020 18:43:12 -0400 Subject: [PATCH 188/321] [gn build] manually port 5e4409f308177 --- .../gn/secondary/llvm/include/llvm/Config/BUILD.gn | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index acbd66aca4ded..1ef33cd177a23 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -213,7 +213,6 @@ write_cmake_config("config") { "HAVE_SIGALTSTACK=", "HAVE_STRERROR_R=", "HAVE_SYSCONF=", - "HAVE_SYSEXITS_H=", "HAVE_SYS_IOCTL_H=", "HAVE_SYS_MMAN_H=", "HAVE_SYS_PARAM_H=", @@ -250,7 +249,6 @@ write_cmake_config("config") { "HAVE_SIGALTSTACK=1", "HAVE_STRERROR_R=1", "HAVE_SYSCONF=1", - "HAVE_SYSEXITS_H=1", "HAVE_SYS_IOCTL_H=1", "HAVE_SYS_MMAN_H=1", "HAVE_SYS_PARAM_H=1", @@ -339,9 +337,15 @@ write_cmake_config("llvm-config") { ] if (current_os == "win") { - values += [ "LLVM_ON_UNIX=" ] + values += [ + "HAVE_SYSEXITS_H=", + "LLVM_ON_UNIX=", + ] } else { - values += [ "LLVM_ON_UNIX=1" ] + values += [ + "HAVE_SYSEXITS_H=1", + "LLVM_ON_UNIX=1", + ] } if (llvm_enable_threads) { From e3b0414b0ea305396a1fcfb2821ad643b0731880 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 6 Oct 2020 15:12:23 -0700 Subject: [PATCH 189/321] [lldb] Change the xcrun (fallback) logic in GetXcodeSDK This changes the logic in GetXcodeSDK to find an SDK with xcrun. The code now executes the following steps: 1. If DEVELOPER_DIR is set in the environment, it invokes xcrun with the given developer dir. If this fails we stop and don't fall back. 2. If the shlib dir is set and exists,it invokes xcrun with the developer dir corresponding to the shlib dir. If this fails we fall back to 3. 3. We run xcrun without a developer dir. The new behavior introduced in this patch is that we fall back to running xcrun without a developer dir if running it based on the shlib dir failed. A situation where this matters is when you're running lldb from an Xcode that has no SDKs and that is not xcode-selected. Based on lldb's shlib dir pointing into this Xcode installation, it will do an xcrun with the developer set to the Xcode without any SDKs which will fail. With this patch, when that happens, we'll fall back to trying the xcode-selected Xcode by running xcrun without a developer dir. Differential revision: https://reviews.llvm.org/D88866 --- .../Host/macosx/objcxx/HostInfoMacOSX.mm | 48 +++++++++++++------ 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm index b325bd2c5b745..5e06792e0fbf6 100644 --- a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm +++ b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm @@ -373,26 +373,19 @@ static void ParseOSVersion(llvm::VersionTuple &version, NSString *Key) { static std::string GetXcodeSDK(XcodeSDK sdk) { XcodeSDK::Info info = sdk.Parse(); std::string sdk_name = XcodeSDK::GetCanonicalName(info); - auto find_sdk = [](std::string sdk_name) -> std::string { - std::string xcrun_cmd; - std::string developer_dir = GetEnvDeveloperDir(); - if (developer_dir.empty()) - if (FileSpec fspec = HostInfo::GetShlibDir()) - if (FileSystem::Instance().Exists(fspec)) { - FileSpec path( - XcodeSDK::FindXcodeContentsDirectoryInPath(fspec.GetPath())); - if (path.RemoveLastPathComponent()) - developer_dir = path.GetPath(); - } + + auto xcrun = [](const std::string &sdk, + llvm::StringRef developer_dir = "") -> std::string { + std::string xcrun_cmd = "xcrun --show-sdk-path --sdk " + sdk; if (!developer_dir.empty()) - xcrun_cmd = "/usr/bin/env DEVELOPER_DIR=\"" + developer_dir + "\" "; - xcrun_cmd += "xcrun --show-sdk-path --sdk " + sdk_name; + xcrun_cmd = "/usr/bin/env DEVELOPER_DIR=\"" + developer_dir.str() + + "\" " + xcrun_cmd; int status = 0; int signo = 0; std::string output_str; lldb_private::Status error = - Host::RunShellCommand(xcrun_cmd.c_str(), FileSpec(), &status, &signo, + Host::RunShellCommand(xcrun_cmd, FileSpec(), &status, &signo, &output_str, std::chrono::seconds(15)); // Check that xcrun return something useful. @@ -414,6 +407,33 @@ FileSpec path( return output.str(); }; + auto find_sdk = [&xcrun](const std::string &sdk_name) -> std::string { + // Invoke xcrun with the developer dir specified in the environment. + std::string developer_dir = GetEnvDeveloperDir(); + if (!developer_dir.empty()) { + // Don't fallback if DEVELOPER_DIR was set. + return xcrun(sdk_name, developer_dir); + } + + // Invoke xcrun with the shlib dir. + if (FileSpec fspec = HostInfo::GetShlibDir()) { + if (FileSystem::Instance().Exists(fspec)) { + std::string contents_dir = + XcodeSDK::FindXcodeContentsDirectoryInPath(fspec.GetPath()); + llvm::StringRef shlib_developer_dir = + llvm::sys::path::parent_path(contents_dir); + if (!shlib_developer_dir.empty()) { + std::string sdk = xcrun(sdk_name, std::move(shlib_developer_dir)); + if (!sdk.empty()) + return sdk; + } + } + } + + // Invoke xcrun without a developer dir as a last resort. + return xcrun(sdk_name); + }; + std::string path = find_sdk(sdk_name); while (path.empty()) { // Try an alternate spelling of the name ("macosx10.9internal"). From 3f540c05dfe90f5c0378614e3a4ada06b967227e Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 7 Sep 2020 19:56:51 -0500 Subject: [PATCH 190/321] [Attributor] Give up early on AANoReturn::initialize If the function is not assumed `noreturn` we should not wait for an update to mark the call site as "may-return". This has two kinds of consequences: - We have less iterations in many tests. - We have less deductions based on "known information" (since we ask earlier, point 1, and therefore assumed information is not "known" yet). The latter is an artifact that we might want to tackle properly at some point but which is not easily fixable right now. --- .../Transforms/IPO/AttributorAttributes.cpp | 11 +++++++++ .../ArgumentPromotion/X86/thiscall.ll | 4 ++-- .../Attributor/ArgumentPromotion/byval.ll | 4 ++-- .../Attributor/ArgumentPromotion/fp80.ll | 4 ++-- .../live_called_from_dead.ll | 4 ++-- .../live_called_from_dead_2.ll | 24 +++++++------------ .../Attributor/ArgumentPromotion/musttail.ll | 4 ++-- .../Attributor/IPConstantProp/PR16052.ll | 4 ++-- .../IPConstantProp/multiple_callbacks.ll | 4 ++-- .../IPConstantProp/musttail-call.ll | 4 ++-- .../Attributor/IPConstantProp/naked-return.ll | 4 ++-- .../Transforms/Attributor/alwaysinline.ll | 4 ++-- .../test/Transforms/Attributor/internalize.ll | 4 ++-- llvm/test/Transforms/Attributor/misc.ll | 4 ++-- llvm/test/Transforms/Attributor/noalias.ll | 4 ++-- .../test/Transforms/Attributor/nocapture-2.ll | 4 ++-- .../read_write_returned_arguments_scc.ll | 4 ++-- llvm/test/Transforms/Attributor/readattrs.ll | 8 +++---- llvm/test/Transforms/Attributor/returned.ll | 4 ++-- 19 files changed, 55 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 3f85d968f8e3b..1472542a4aa05 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -4008,6 +4008,17 @@ struct AANoReturnCallSite final : AANoReturnImpl { AANoReturnCallSite(const IRPosition &IRP, Attributor &A) : AANoReturnImpl(IRP, A) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoReturnImpl::initialize(A); + if (Function *F = getAssociatedFunction()) { + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + if (!FnAA.isAssumedNoReturn()) + indicatePessimisticFixpoint(); + } + } + /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { // TODO: Once we have call site specific value information we can provide diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll index 29f6a1bf6d3f5..b6b5d2e715cb9 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll @@ -4,8 +4,8 @@ ; we don't do that anymore. It also verifies that the combination of ; globalopt and argpromotion is able to optimize the call safely. ; -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll index 8d6603dffaa33..f9ada827e25e3 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll index 2b3cd5e92f6e2..6dffe1e893e13 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; TODO: The old pass manager cgscc run is disabled as it causes a crash on windows which is under investigation: http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/23151 ; opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll index 8426620397a56..b0e50b6b7fbf9 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll index 113300e98684a..e45683c6c816a 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll @@ -42,21 +42,13 @@ dead: } define internal i32 @caller(i32* %B) { -; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly -; IS__TUNIT____-LABEL: define {{[^@]+}}@caller -; IS__TUNIT____-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) [[ATTR0:#.*]] { -; IS__TUNIT____-NEXT: [[A:%.*]] = alloca i32, align 4 -; IS__TUNIT____-NEXT: store i32 1, i32* [[A]], align 4 -; IS__TUNIT____-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) [[ATTR2:#.*]] -; IS__TUNIT____-NEXT: ret i32 0 -; -; IS__CGSCC_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@caller -; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) [[ATTR0:#.*]] { -; IS__CGSCC_OPM-NEXT: [[A:%.*]] = alloca i32, align 4 -; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[A]], align 4 -; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) [[ATTR2:#.*]] -; IS__CGSCC_OPM-NEXT: ret i32 undef +; NOT_CGSCC_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@caller +; NOT_CGSCC_NPM-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) [[ATTR0]] { +; NOT_CGSCC_NPM-NEXT: [[A:%.*]] = alloca i32, align 4 +; NOT_CGSCC_NPM-NEXT: store i32 1, i32* [[A]], align 4 +; NOT_CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) [[ATTR2:#.*]] +; NOT_CGSCC_NPM-NEXT: ret i32 undef ; ; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@caller @@ -78,7 +70,7 @@ define i32 @callercaller() { ; IS__TUNIT____-SAME: () [[ATTR1:#.*]] { ; IS__TUNIT____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__TUNIT____-NEXT: store i32 2, i32* [[B]], align 4 -; IS__TUNIT____-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) [[ATTR2]] +; IS__TUNIT____-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) [[ATTR2:#.*]] ; IS__TUNIT____-NEXT: ret i32 0 ; ; IS__CGSCC_OPM: Function Attrs: nofree nosync nounwind readnone willreturn diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll index a0bdea726479a..34adb3ebc2324 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; PR36543 diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll index 375cbda5a5b46..2caac62c1d93a 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll index 91bf46ca2148f..d2a6ec7bb06e3 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll b/llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll index 033e809be78a6..0f062741abee7 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; PR36485 diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/naked-return.ll b/llvm/test/Transforms/Attributor/IPConstantProp/naked-return.ll index 4ef78975bf8b0..83e5390b4fba8 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/naked-return.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/naked-return.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/alwaysinline.ll b/llvm/test/Transforms/Attributor/alwaysinline.ll index 1c67d24e4e560..2b32c78c35026 100644 --- a/llvm/test/Transforms/Attributor/alwaysinline.ll +++ b/llvm/test/Transforms/Attributor/alwaysinline.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; diff --git a/llvm/test/Transforms/Attributor/internalize.ll b/llvm/test/Transforms/Attributor/internalize.ll index 3e485382e9be0..89c800b387ad5 100644 --- a/llvm/test/Transforms/Attributor/internalize.ll +++ b/llvm/test/Transforms/Attributor/internalize.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes ; Deep Wrapper disabled -; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM,CHECK_DISABLED,NOT_CGSCC_NPM_DISABLED,NOT_CGSCC_OPM_DISABLED,NOT_TUNIT_NPM_DISABLED,IS__TUNIT_____DISABLED,IS________OPM_DISABLED,IS__TUNIT_OPM_DISABLED -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM,CHECK_DISABLED,NOT_CGSCC_OPM_DISABLED,NOT_CGSCC_NPM_DISABLED,NOT_TUNIT_OPM_DISABLED,IS__TUNIT_____DISABLED,IS________NPM_DISABLED,IS__TUNIT_NPM_DISABLED +; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM,CHECK_DISABLED,NOT_CGSCC_NPM_DISABLED,NOT_CGSCC_OPM_DISABLED,NOT_TUNIT_NPM_DISABLED,IS__TUNIT_____DISABLED,IS________OPM_DISABLED,IS__TUNIT_OPM_DISABLED +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM,CHECK_DISABLED,NOT_CGSCC_OPM_DISABLED,NOT_CGSCC_NPM_DISABLED,NOT_TUNIT_OPM_DISABLED,IS__TUNIT_____DISABLED,IS________NPM_DISABLED,IS__TUNIT_NPM_DISABLED ; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM,CHECK_DISABLED,NOT_TUNIT_NPM_DISABLED,NOT_TUNIT_OPM_DISABLED,NOT_CGSCC_NPM_DISABLED,IS__CGSCC_____DISABLED,IS________OPM_DISABLED,IS__CGSCC_OPM_DISABLED ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM,CHECK_DISABLED,NOT_TUNIT_NPM_DISABLED,NOT_TUNIT_OPM_DISABLED,NOT_CGSCC_OPM_DISABLED,IS__CGSCC_____DISABLED,IS________NPM_DISABLED,IS__CGSCC_NPM_DISABLED diff --git a/llvm/test/Transforms/Attributor/misc.ll b/llvm/test/Transforms/Attributor/misc.ll index a5c4556ac0417..6604876d6e69e 100644 --- a/llvm/test/Transforms/Attributor/misc.ll +++ b/llvm/test/Transforms/Attributor/misc.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; diff --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll index f60346dd71661..9f581094af6b9 100644 --- a/llvm/test/Transforms/Attributor/noalias.ll +++ b/llvm/test/Transforms/Attributor/noalias.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; TODO: The old pass manager cgscc run is disabled as it causes a crash on windows which is under investigation: http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/25479/steps/test-check-all/logs/FAIL%3A%20LLVM%3A%3Anoalias.ll ; opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/nocapture-2.ll b/llvm/test/Transforms/Attributor/nocapture-2.ll index 81a0646e1573a..bb5ace7cce1c5 100644 --- a/llvm/test/Transforms/Attributor/nocapture-2.ll +++ b/llvm/test/Transforms/Attributor/nocapture-2.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=8 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=8 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; diff --git a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll index caae0a71b7aef..32c597010b887 100644 --- a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll +++ b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=16 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=16 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=12 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=13 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/readattrs.ll b/llvm/test/Transforms/Attributor/readattrs.ll index 17c819b4df181..570ebd783f8f1 100644 --- a/llvm/test/Transforms/Attributor/readattrs.ll +++ b/llvm/test/Transforms/Attributor/readattrs.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM @@ -369,14 +369,14 @@ define void @testbyval(i8* %read_only) { ; IS__TUNIT____-LABEL: define {{[^@]+}}@testbyval ; IS__TUNIT____-SAME: (i8* nocapture readonly [[READ_ONLY:%.*]]) { ; IS__TUNIT____-NEXT: call void @byval_not_readonly_1(i8* nocapture readonly [[READ_ONLY]]) [[ATTR2:#.*]] -; IS__TUNIT____-NEXT: call void @byval_not_readnone_1(i8* noalias nocapture readnone [[READ_ONLY]]) [[ATTR9:#.*]] +; IS__TUNIT____-NEXT: call void @byval_not_readnone_1(i8* noalias nocapture readnone [[READ_ONLY]]) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: readonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@testbyval ; IS__CGSCC____-SAME: (i8* nocapture nonnull readonly dereferenceable(1) [[READ_ONLY:%.*]]) [[ATTR2:#.*]] { ; IS__CGSCC____-NEXT: call void @byval_not_readonly_1(i8* noalias nocapture nonnull readonly dereferenceable(1) [[READ_ONLY]]) [[ATTR2]] -; IS__CGSCC____-NEXT: call void @byval_not_readnone_1(i8* noalias nocapture nonnull readnone dereferenceable(1) [[READ_ONLY]]) [[ATTR9:#.*]] +; IS__CGSCC____-NEXT: call void @byval_not_readnone_1(i8* noalias nocapture nonnull readnone dereferenceable(1) [[READ_ONLY]]) ; IS__CGSCC____-NEXT: ret void ; call void @byval_not_readonly_1(i8* %read_only) diff --git a/llvm/test/Transforms/Attributor/returned.ll b/llvm/test/Transforms/Attributor/returned.ll index b295498c79445..02fbe15171b6f 100644 --- a/llvm/test/Transforms/Attributor/returned.ll +++ b/llvm/test/Transforms/Attributor/returned.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=14 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=14 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=8 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=8 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; From c4cfe7a43512c8fadb2aa207f6d914858e2cc50e Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 9 Sep 2020 00:41:46 -0500 Subject: [PATCH 191/321] [Attributor] Ignore read accesses to constant memory The old function attribute deduction pass ignores reads of constant memory and we need to copy this behavior to replace the pass completely. First step are constant globals. TBAA can also describe constant accesses and there are other possibilities. We might want to consider asking the alias analyses that are available but for now this is simpler and cheaper. --- .../Transforms/IPO/AttributorAttributes.cpp | 8 ++++++++ .../ArgumentPromotion/aggregate-promote.ll | 8 ++++---- .../ArgumentPromotion/invalidation.ll | 6 +++--- llvm/test/Transforms/Attributor/readattrs.ll | 18 ++++++++++++++++++ 4 files changed, 33 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 1472542a4aa05..2e0f034694ffb 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -6605,6 +6605,7 @@ void AAMemoryLocationImpl::categorizePtrValue( auto VisitValueCB = [&](Value &V, const Instruction *, AAMemoryLocation::StateType &T, bool Stripped) -> bool { + // TODO: recognize the TBAA used for constant accesses. MemoryLocationsKind MLK = NO_LOCATIONS; assert(!isa(V) && "GEPs should have been stripped."); if (isa(V)) @@ -6615,6 +6616,13 @@ void AAMemoryLocationImpl::categorizePtrValue( else MLK = NO_ARGUMENT_MEM; } else if (auto *GV = dyn_cast(&V)) { + // Reading constant memory is not treated as a read "effect" by the + // function attr pass so we won't neither. Constants defined by TBAA are + // similar. (We know we do not write it because it is constant.) + if (auto *GVar = dyn_cast(GV)) + if (GVar->isConstant()) + return true; + if (GV->hasLocalLinkage()) MLK = NO_GLOBAL_INTERNAL_MEM; else diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll index 8c87bd01362b9..8dd54ce918ba8 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll @@ -8,7 +8,7 @@ @G = constant %T { i32 0, i32 0, i32 17, i32 25 } define internal i32 @test(%T* %p) { -; IS__TUNIT____: Function Attrs: nofree nosync nounwind readonly willreturn +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@test ; IS__TUNIT____-SAME: () [[ATTR0:#.*]] { ; IS__TUNIT____-NEXT: entry: @@ -19,7 +19,7 @@ define internal i32 @test(%T* %p) { ; IS__TUNIT____-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] ; IS__TUNIT____-NEXT: ret i32 [[V]] ; -; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@test ; IS__CGSCC____-SAME: () [[ATTR0:#.*]] { ; IS__CGSCC____-NEXT: entry: @@ -40,14 +40,14 @@ entry: } define i32 @caller() { -; IS__TUNIT____: Function Attrs: nofree nosync nounwind readonly willreturn +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@caller ; IS__TUNIT____-SAME: () [[ATTR0]] { ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[V:%.*]] = call i32 @test() [[ATTR0]] ; IS__TUNIT____-NEXT: ret i32 [[V]] ; -; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@caller ; IS__CGSCC____-SAME: () [[ATTR0]] { ; IS__CGSCC____-NEXT: entry: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/invalidation.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/invalidation.ll index 095981a36665f..ffd40da1f560f 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/invalidation.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/invalidation.ll @@ -12,7 +12,7 @@ @G = constant i32 0 define internal i32 @a(i32* %x) { -; CHECK: Function Attrs: nofree nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree nosync nounwind readnone willreturn ; CHECK-LABEL: define {{[^@]+}}@a ; CHECK-SAME: () [[ATTR0:#.*]] { ; CHECK-NEXT: entry: @@ -25,7 +25,7 @@ entry: } define i32 @b() { -; CHECK: Function Attrs: nofree nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree nosync nounwind readnone willreturn ; CHECK-LABEL: define {{[^@]+}}@b ; CHECK-SAME: () [[ATTR0]] { ; CHECK-NEXT: entry: @@ -38,7 +38,7 @@ entry: } define i32 @c() { -; CHECK: Function Attrs: nofree nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree nosync nounwind readnone willreturn ; CHECK-LABEL: define {{[^@]+}}@c ; CHECK-SAME: () [[ATTR0]] { ; CHECK-NEXT: entry: diff --git a/llvm/test/Transforms/Attributor/readattrs.ll b/llvm/test/Transforms/Attributor/readattrs.ll index 570ebd783f8f1..ef7779606c47a 100644 --- a/llvm/test/Transforms/Attributor/readattrs.ll +++ b/llvm/test/Transforms/Attributor/readattrs.ll @@ -449,3 +449,21 @@ define void @ptr_use_chain(i8* %ptr) { call void @escape_i8(i8* %abc9) ret void } + +@constant_mem = external dso_local constant i32, align 4 +define i32 @read_only_constant_mem() { +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@read_only_constant_mem +; IS__TUNIT____-SAME: () [[ATTR1]] { +; IS__TUNIT____-NEXT: [[L:%.*]] = load i32, i32* @constant_mem, align 4 +; IS__TUNIT____-NEXT: ret i32 [[L]] +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@read_only_constant_mem +; IS__CGSCC____-SAME: () [[ATTR1]] { +; IS__CGSCC____-NEXT: [[L:%.*]] = load i32, i32* @constant_mem, align 4 +; IS__CGSCC____-NEXT: ret i32 [[L]] +; + %l = load i32, i32* @constant_mem + ret i32 %l +} From 7993d61177c8c6e9aa238e57a684c6cd3db3b571 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 12 Sep 2020 23:41:48 -0500 Subject: [PATCH 192/321] [Attributor] Use smarter way to determine alignment of GEPs Use same logic existing in other places to deal with base case GEPs. Add the original Attributor talk example. --- .../Transforms/IPO/AttributorAttributes.cpp | 18 ++++- .../ArgumentPromotion/aggregate-promote.ll | 2 +- llvm/test/Transforms/Attributor/align.ll | 78 +++++++++++++++---- 3 files changed, 81 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 2e0f034694ffb..6b07bbd1aff86 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -3848,9 +3848,23 @@ struct AAAlignFloating : AAAlignImpl { AAAlign::StateType &T, bool Stripped) -> bool { const auto &AA = A.getAAFor(*this, IRPosition::value(V)); if (!Stripped && this == &AA) { + int64_t Offset; + unsigned Alignment = 1; + if (const Value *Base = + GetPointerBaseWithConstantOffset(&V, Offset, DL)) { + Align PA = Base->getPointerAlignment(DL); + // BasePointerAddr + Offset = Alignment * Q for some integer Q. + // So we can say that the maximum power of two which is a divisor of + // gcd(Offset, Alignment) is an alignment. + + uint32_t gcd = greatestCommonDivisor(uint32_t(abs((int32_t)Offset)), + uint32_t(PA.value())); + Alignment = llvm::PowerOf2Floor(gcd); + } else { + Alignment = V.getPointerAlignment(DL).value(); + } // Use only IR information if we did not strip anything. - Align PA = V.getPointerAlignment(DL); - T.takeKnownMaximum(PA.value()); + T.takeKnownMaximum(Alignment); T.indicatePessimisticFixpoint(); } else { // Use abstract attribute information. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll index 8dd54ce918ba8..b175e2bb5bc49 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll @@ -26,7 +26,7 @@ define internal i32 @test(%T* %p) { ; IS__CGSCC____-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* @G, i64 0, i32 3 ; IS__CGSCC____-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* @G, i64 0, i32 2 ; IS__CGSCC____-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]], align 4 -; IS__CGSCC____-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]], align 4 +; IS__CGSCC____-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]], align 8 ; IS__CGSCC____-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] ; IS__CGSCC____-NEXT: ret i32 [[V]] ; diff --git a/llvm/test/Transforms/Attributor/align.ll b/llvm/test/Transforms/Attributor/align.ll index e859194776955..0c36d4a5d07e8 100644 --- a/llvm/test/Transforms/Attributor/align.ll +++ b/llvm/test/Transforms/Attributor/align.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM @@ -408,13 +408,13 @@ define i32* @test10a(i32* align 32 %p) { ; NOT_CGSCC_OPM-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 ; NOT_CGSCC_OPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; NOT_CGSCC_OPM: t: -; NOT_CGSCC_OPM-NEXT: [[R:%.*]] = call i32* @test10a(i32* nofree nonnull align 32 dereferenceable(4) "no-capture-maybe-returned" [[P]]) [[ATTR3]] -; NOT_CGSCC_OPM-NEXT: store i32 1, i32* [[R]], align 4 +; NOT_CGSCC_OPM-NEXT: [[R:%.*]] = call align 32 i32* @test10a(i32* nofree nonnull align 32 dereferenceable(4) "no-capture-maybe-returned" [[P]]) [[ATTR3]] +; NOT_CGSCC_OPM-NEXT: store i32 1, i32* [[R]], align 32 ; NOT_CGSCC_OPM-NEXT: [[G0:%.*]] = getelementptr i32, i32* [[P]], i32 8 ; NOT_CGSCC_OPM-NEXT: br label [[E:%.*]] ; NOT_CGSCC_OPM: f: ; NOT_CGSCC_OPM-NEXT: [[G1:%.*]] = getelementptr i32, i32* [[P]], i32 8 -; NOT_CGSCC_OPM-NEXT: store i32 -1, i32* [[G1]], align 4 +; NOT_CGSCC_OPM-NEXT: store i32 -1, i32* [[G1]], align 32 ; NOT_CGSCC_OPM-NEXT: br label [[E]] ; NOT_CGSCC_OPM: e: ; NOT_CGSCC_OPM-NEXT: [[PHI:%.*]] = phi i32* [ [[G0]], [[T]] ], [ [[G1]], [[F]] ] @@ -427,13 +427,13 @@ define i32* @test10a(i32* align 32 %p) { ; IS__CGSCC_OPM-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 ; IS__CGSCC_OPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__CGSCC_OPM: t: -; IS__CGSCC_OPM-NEXT: [[R:%.*]] = call i32* @test10a(i32* nofree nonnull align 32 dereferenceable(4) "no-capture-maybe-returned" [[P]]) [[ATTR4]] -; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[R]], align 4 +; IS__CGSCC_OPM-NEXT: [[R:%.*]] = call align 32 i32* @test10a(i32* nofree nonnull align 32 dereferenceable(4) "no-capture-maybe-returned" [[P]]) [[ATTR4]] +; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[R]], align 32 ; IS__CGSCC_OPM-NEXT: [[G0:%.*]] = getelementptr i32, i32* [[P]], i32 8 ; IS__CGSCC_OPM-NEXT: br label [[E:%.*]] ; IS__CGSCC_OPM: f: ; IS__CGSCC_OPM-NEXT: [[G1:%.*]] = getelementptr i32, i32* [[P]], i32 8 -; IS__CGSCC_OPM-NEXT: store i32 -1, i32* [[G1]], align 4 +; IS__CGSCC_OPM-NEXT: store i32 -1, i32* [[G1]], align 32 ; IS__CGSCC_OPM-NEXT: br label [[E]] ; IS__CGSCC_OPM: e: ; IS__CGSCC_OPM-NEXT: [[PHI:%.*]] = phi i32* [ [[G0]], [[T]] ], [ [[G1]], [[F]] ] @@ -470,13 +470,13 @@ define i32* @test10b(i32* align 32 %p) { ; NOT_CGSCC_OPM-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 ; NOT_CGSCC_OPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; NOT_CGSCC_OPM: t: -; NOT_CGSCC_OPM-NEXT: [[R:%.*]] = call i32* @test10b(i32* nofree nonnull align 32 dereferenceable(4) "no-capture-maybe-returned" [[P]]) [[ATTR3]] -; NOT_CGSCC_OPM-NEXT: store i32 1, i32* [[R]], align 4 +; NOT_CGSCC_OPM-NEXT: [[R:%.*]] = call align 32 i32* @test10b(i32* nofree nonnull align 32 dereferenceable(4) "no-capture-maybe-returned" [[P]]) [[ATTR3]] +; NOT_CGSCC_OPM-NEXT: store i32 1, i32* [[R]], align 32 ; NOT_CGSCC_OPM-NEXT: [[G0:%.*]] = getelementptr i32, i32* [[P]], i32 8 ; NOT_CGSCC_OPM-NEXT: br label [[E:%.*]] ; NOT_CGSCC_OPM: f: ; NOT_CGSCC_OPM-NEXT: [[G1:%.*]] = getelementptr i32, i32* [[P]], i32 -8 -; NOT_CGSCC_OPM-NEXT: store i32 -1, i32* [[G1]], align 4 +; NOT_CGSCC_OPM-NEXT: store i32 -1, i32* [[G1]], align 32 ; NOT_CGSCC_OPM-NEXT: br label [[E]] ; NOT_CGSCC_OPM: e: ; NOT_CGSCC_OPM-NEXT: [[PHI:%.*]] = phi i32* [ [[G0]], [[T]] ], [ [[G1]], [[F]] ] @@ -489,13 +489,13 @@ define i32* @test10b(i32* align 32 %p) { ; IS__CGSCC_OPM-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 ; IS__CGSCC_OPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__CGSCC_OPM: t: -; IS__CGSCC_OPM-NEXT: [[R:%.*]] = call i32* @test10b(i32* nofree nonnull align 32 dereferenceable(4) "no-capture-maybe-returned" [[P]]) [[ATTR4]] -; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[R]], align 4 +; IS__CGSCC_OPM-NEXT: [[R:%.*]] = call align 32 i32* @test10b(i32* nofree nonnull align 32 dereferenceable(4) "no-capture-maybe-returned" [[P]]) [[ATTR4]] +; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[R]], align 32 ; IS__CGSCC_OPM-NEXT: [[G0:%.*]] = getelementptr i32, i32* [[P]], i32 8 ; IS__CGSCC_OPM-NEXT: br label [[E:%.*]] ; IS__CGSCC_OPM: f: ; IS__CGSCC_OPM-NEXT: [[G1:%.*]] = getelementptr i32, i32* [[P]], i32 -8 -; IS__CGSCC_OPM-NEXT: store i32 -1, i32* [[G1]], align 4 +; IS__CGSCC_OPM-NEXT: store i32 -1, i32* [[G1]], align 32 ; IS__CGSCC_OPM-NEXT: br label [[E]] ; IS__CGSCC_OPM: e: ; IS__CGSCC_OPM-NEXT: [[PHI:%.*]] = phi i32* [ [[G0]], [[T]] ], [ [[G1]], [[F]] ] @@ -992,6 +992,56 @@ exit: ret i32 0 } +define i32* @checkAndAdvance(i32* align(16) %p) { +; NOT_CGSCC_OPM: Function Attrs: nounwind +; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@checkAndAdvance +; NOT_CGSCC_OPM-SAME: (i32* nonnull readonly align 16 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]]) [[ATTR2]] { +; NOT_CGSCC_OPM-NEXT: entry: +; NOT_CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[P]], align 16 +; NOT_CGSCC_OPM-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; NOT_CGSCC_OPM-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] +; NOT_CGSCC_OPM: if.then: +; NOT_CGSCC_OPM-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 +; NOT_CGSCC_OPM-NEXT: [[CALL:%.*]] = call nonnull align 16 i32* @checkAndAdvance(i32* nonnull readonly align 16 "no-capture-maybe-returned" [[ADD_PTR]]) [[ATTR2]] +; NOT_CGSCC_OPM-NEXT: br label [[RETURN]] +; NOT_CGSCC_OPM: return: +; NOT_CGSCC_OPM-NEXT: [[RETVAL_0:%.*]] = phi i32* [ [[CALL]], [[IF_THEN]] ], [ [[P]], [[ENTRY:%.*]] ] +; NOT_CGSCC_OPM-NEXT: call void @user_i32_ptr(i32* noalias nocapture nonnull readnone align 16 [[RETVAL_0]]) [[ATTR2]] +; NOT_CGSCC_OPM-NEXT: ret i32* [[RETVAL_0]] +; +; IS__CGSCC_OPM: Function Attrs: nounwind +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@checkAndAdvance +; IS__CGSCC_OPM-SAME: (i32* nonnull readonly align 16 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]]) [[ATTR3]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[P]], align 16 +; IS__CGSCC_OPM-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; IS__CGSCC_OPM-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] +; IS__CGSCC_OPM: if.then: +; IS__CGSCC_OPM-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call nonnull align 16 i32* @checkAndAdvance(i32* nonnull readonly align 16 "no-capture-maybe-returned" [[ADD_PTR]]) [[ATTR3]] +; IS__CGSCC_OPM-NEXT: br label [[RETURN]] +; IS__CGSCC_OPM: return: +; IS__CGSCC_OPM-NEXT: [[RETVAL_0:%.*]] = phi i32* [ [[CALL]], [[IF_THEN]] ], [ [[P]], [[ENTRY:%.*]] ] +; IS__CGSCC_OPM-NEXT: call void @user_i32_ptr(i32* noalias nocapture nonnull readnone align 16 [[RETVAL_0]]) [[ATTR3]] +; IS__CGSCC_OPM-NEXT: ret i32* [[RETVAL_0]] +; +entry: + %0 = load i32, i32* %p, align 4 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %return + +if.then: ; preds = %entry + %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 + %call = call i32* @checkAndAdvance(i32* nonnull %add.ptr) + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32* [ %call, %if.then ], [ %p, %entry ] + call void @user_i32_ptr(i32* %retval.0) + ret i32* %retval.0 +} + + attributes #0 = { nounwind uwtable noinline } attributes #1 = { uwtable noinline } attributes #2 = { null_pointer_is_valid } From 2f40e20613758b3e11a15494c09f4b6973673d6b Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Tue, 6 Oct 2020 21:26:13 -0400 Subject: [PATCH 193/321] [flang][openacc] Fix device_num and device_type clauses for init directive This patch fix the device_num and device_type clauses used in the init clause. device_num was not spelled correctly in the parser and was to restrictive with scalarIntConstantExpr instead of scalarIntExpr. device_type is now taking a list of ScalarIntExpr. Reviewed By: kiranchandramohan Differential Revision: https://reviews.llvm.org/D88571 --- flang/lib/Parser/openacc-parsers.cpp | 8 ++++---- flang/test/Semantics/acc-clause-validity.f90 | 10 ++++++++++ llvm/include/llvm/Frontend/OpenACC/ACC.td | 4 ++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/flang/lib/Parser/openacc-parsers.cpp b/flang/lib/Parser/openacc-parsers.cpp index 01c258325f2ce..686259b756512 100644 --- a/flang/lib/Parser/openacc-parsers.cpp +++ b/flang/lib/Parser/openacc-parsers.cpp @@ -57,17 +57,17 @@ TYPE_PARSER("AUTO" >> construct(construct()) || parenthesized(Parser{}))) || "DEVICEPTR" >> construct(construct( parenthesized(Parser{}))) || - "DEVICENUM" >> construct(construct( - parenthesized(scalarIntConstantExpr))) || + "DEVICE_NUM" >> construct(construct( + parenthesized(scalarIntExpr))) || "DEVICE_RESIDENT" >> construct(construct( parenthesized(Parser{}))) || ("DEVICE_TYPE"_tok || "DTYPE"_tok) >> construct(construct(parenthesized( - "*" >> construct>>()))) || + "*" >> construct>>()))) || ("DEVICE_TYPE"_tok || "DTYPE"_tok) >> construct(construct( - parenthesized(maybe(nonemptyList(name))))) || + parenthesized(maybe(nonemptyList(scalarIntExpr))))) || "FINALIZE" >> construct(construct()) || "FIRSTPRIVATE" >> construct(construct( parenthesized(Parser{}))) || diff --git a/flang/test/Semantics/acc-clause-validity.f90 b/flang/test/Semantics/acc-clause-validity.f90 index 9683a4e02c747..9a7bfe97185be 100644 --- a/flang/test/Semantics/acc-clause-validity.f90 +++ b/flang/test/Semantics/acc-clause-validity.f90 @@ -25,11 +25,21 @@ program openacc_clause_validity real :: reduction_r logical :: reduction_l real(8), dimension(N, N) :: aa + logical :: ifCondition = .TRUE. !ERROR: At least one clause is required on the DECLARE directive !$acc declare real(8), dimension(N) :: a + !$acc init + !$acc init if(.TRUE.) + !$acc init if(ifCondition) + !$acc init device_num(1) + !$acc init device_num(i) + !$acc init device_type(i) + !$acc init device_type(2, i, j) + !$acc init device_num(i) device_type(i, j) if(ifCondition) + !ERROR: At least one of ATTACH, COPYIN, CREATE clause must appear on the ENTER DATA directive !$acc enter data diff --git a/llvm/include/llvm/Frontend/OpenACC/ACC.td b/llvm/include/llvm/Frontend/OpenACC/ACC.td index b15f8348c8f42..879bbc21b9409 100644 --- a/llvm/include/llvm/Frontend/OpenACC/ACC.td +++ b/llvm/include/llvm/Frontend/OpenACC/ACC.td @@ -106,7 +106,7 @@ def ACCC_Device : Clause<"device"> { // 2.14.1 def ACCC_DeviceNum : Clause<"device_num"> { - let flangClassValue = "ScalarIntConstantExpr"; + let flangClassValue = "ScalarIntExpr"; } // 2.7.3 @@ -121,7 +121,7 @@ def ACCC_DeviceResident : Clause<"device_resident"> { // 2.4 def ACCC_DeviceType : Clause<"device_type"> { - let flangClassValue = "Name"; + let flangClassValue = "ScalarIntExpr"; let defaultValue = "*"; let isValueOptional = 1; let isValueList = 1; From d2c61d2bf9bd1efad49acba2f2751112522686aa Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 6 Oct 2020 17:54:36 -0700 Subject: [PATCH 194/321] [CodeGen][TailDuplicator] Don't duplicate blocks with INLINEASM_BR Tail duplication of a block with an INLINEASM_BR may result in a PHI node on the indirect branch. This is okay, but it also introduces a copy for that PHI node *after* the INLINEASM_BR, which is not okay. See: https://github.com/ClangBuiltLinux/linux/issues/1125 Differential Revision: https://reviews.llvm.org/D88823 --- llvm/lib/CodeGen/TailDuplicator.cpp | 8 +++ llvm/test/CodeGen/X86/tail-dup-asm-goto.ll | 61 ++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 llvm/test/CodeGen/X86/tail-dup-asm-goto.ll diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index bd554189f12b5..f9773f74a7bdd 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -627,6 +627,14 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (PreRegAlloc && MI.isCall()) return false; + // TailDuplicator::appendCopies will erroneously place COPYs after + // INLINEASM_BR instructions after 4b0aa5724fea, which demonstrates the same + // bug that was fixed in f7a53d82c090. + // FIXME: Use findPHICopyInsertPoint() to find the correct insertion point + // for the COPY when replacing PHIs. + if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) + return false; + if (MI.isBundle()) InstrCount += MI.getBundleSize(); else if (!MI.isPHI() && !MI.isMetaInstruction()) diff --git a/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll b/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll new file mode 100644 index 0000000000000..77aa3adf0fc69 --- /dev/null +++ b/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple=x86_64-linux -stop-after=early-tailduplication < %s | FileCheck %s + +; Ensure that we don't duplicate a block with an "INLINEASM_BR" instruction +; during code gen. +declare void @foo() + +define i8* @test1(i8** %arg1, i8* %arg2) { + ; CHECK-LABEL: name: test1 + ; CHECK: bb.0.bb: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $rdi, $rsi + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 0, $noreg :: (load 8 from %ir.arg1) + ; CHECK: [[SUB64rr:%[0-9]+]]:gr64 = SUB64rr [[MOV64rm]], [[COPY]], implicit-def $eflags + ; CHECK: JCC_1 %bb.2, 4, implicit $eflags + ; CHECK: JMP_1 %bb.1 + ; CHECK: bb.1.bb100: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: MOV64mi32 [[COPY1]], 1, $noreg, 0, $noreg, 0 :: (store 8 into %ir.arg1) + ; CHECK: JMP_1 %bb.3 + ; CHECK: bb.2.bb106: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK: CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp + ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK: bb.3.bb110: + ; CHECK: successors: %bb.5(0x80000000), %bb.4(0x00000000) + ; CHECK: [[PHI:%[0-9]+]]:gr64 = PHI [[COPY]], %bb.2, [[MOV64rm]], %bb.1 + ; CHECK: INLINEASM_BR &"#$0 $1 $2", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42, 13 /* imm */, 0, 13 /* imm */, blockaddress(@test1, %ir-block.bb17.i.i.i), 12 /* clobber */, implicit-def early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def early-clobber $eflags + ; CHECK: JMP_1 %bb.5 + ; CHECK: bb.4.bb17.i.i.i (address-taken): + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: bb.5.kmem_cache_has_cpu_partial.exit: + ; CHECK: $rax = COPY [[PHI]] + ; CHECK: RET 0, $rax +bb: + %i28.i = load i8*, i8** %arg1, align 8 + %if = icmp ne i8* %i28.i, %arg2 + br i1 %if, label %bb100, label %bb106 + +bb100: ; preds = %bb + store i8* null, i8** %arg1, align 8 + br label %bb110 + +bb106: ; preds = %bb + call void @foo() + br label %bb110 + +bb110: ; preds = %bb106, %bb100 + %i10.1 = phi i8* [ %arg2, %bb106 ], [ %i28.i, %bb100 ] + callbr void asm sideeffect "#$0 $1 $2", "i,i,X,~{dirflag},~{fpsr},~{flags}"(i32 42, i1 false, i8* blockaddress(@test1, %bb17.i.i.i)) + to label %kmem_cache_has_cpu_partial.exit [label %bb17.i.i.i] + +bb17.i.i.i: ; preds = %bb110 + br label %kmem_cache_has_cpu_partial.exit + +kmem_cache_has_cpu_partial.exit: ; preds = %bb110 + ret i8* %i10.1 +} From 0492dd91c49ca38bb45d5294f2a6711e3d40b16f Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Sun, 27 Sep 2020 04:47:46 -0400 Subject: [PATCH 195/321] [PowerPC] add more builtins for PPCTargetLowering::getTgtMemIntrinsic Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D88374 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 435cc80dd68e6..6a05817884487 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15853,7 +15853,11 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::ppc_altivec_lvehx: case Intrinsic::ppc_altivec_lvewx: case Intrinsic::ppc_vsx_lxvd2x: - case Intrinsic::ppc_vsx_lxvw4x: { + case Intrinsic::ppc_vsx_lxvw4x: + case Intrinsic::ppc_vsx_lxvd2x_be: + case Intrinsic::ppc_vsx_lxvw4x_be: + case Intrinsic::ppc_vsx_lxvl: + case Intrinsic::ppc_vsx_lxvll: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_altivec_lvebx: @@ -15866,6 +15870,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, VT = MVT::i32; break; case Intrinsic::ppc_vsx_lxvd2x: + case Intrinsic::ppc_vsx_lxvd2x_be: VT = MVT::v2f64; break; default: @@ -15888,7 +15893,11 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::ppc_altivec_stvehx: case Intrinsic::ppc_altivec_stvewx: case Intrinsic::ppc_vsx_stxvd2x: - case Intrinsic::ppc_vsx_stxvw4x: { + case Intrinsic::ppc_vsx_stxvw4x: + case Intrinsic::ppc_vsx_stxvd2x_be: + case Intrinsic::ppc_vsx_stxvw4x_be: + case Intrinsic::ppc_vsx_stxvl: + case Intrinsic::ppc_vsx_stxvll: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_altivec_stvebx: @@ -15901,6 +15910,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, VT = MVT::i32; break; case Intrinsic::ppc_vsx_stxvd2x: + case Intrinsic::ppc_vsx_stxvd2x_be: VT = MVT::v2f64; break; default: From f05608707c641423a4cd4d5923a44cda153ebbcb Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Sun, 27 Sep 2020 04:13:16 -0400 Subject: [PATCH 196/321] [PowerPC] implement target hook getTgtMemIntrinsic This patch can make pass recognize Powerpc related memory intrinsics. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D88373 --- .../Target/PowerPC/PPCTargetTransformInfo.cpp | 42 +++++++++++++++++++ .../Target/PowerPC/PPCTargetTransformInfo.h | 1 + .../Power/memory-intrinsic.ll | 38 +++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 llvm/test/Transforms/LoopStrengthReduce/Power/memory-intrinsic.ll diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index afe963234174e..d89a8784edd22 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1203,3 +1203,45 @@ bool PPCTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, else return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); } + +bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, + MemIntrinsicInfo &Info) { + switch (Inst->getIntrinsicID()) { + case Intrinsic::ppc_altivec_lvx: + case Intrinsic::ppc_altivec_lvxl: + case Intrinsic::ppc_altivec_lvebx: + case Intrinsic::ppc_altivec_lvehx: + case Intrinsic::ppc_altivec_lvewx: + case Intrinsic::ppc_vsx_lxvd2x: + case Intrinsic::ppc_vsx_lxvw4x: + case Intrinsic::ppc_vsx_lxvd2x_be: + case Intrinsic::ppc_vsx_lxvw4x_be: + case Intrinsic::ppc_vsx_lxvl: + case Intrinsic::ppc_vsx_lxvll: { + Info.PtrVal = Inst->getArgOperand(0); + Info.ReadMem = true; + Info.WriteMem = false; + return true; + } + case Intrinsic::ppc_altivec_stvx: + case Intrinsic::ppc_altivec_stvxl: + case Intrinsic::ppc_altivec_stvebx: + case Intrinsic::ppc_altivec_stvehx: + case Intrinsic::ppc_altivec_stvewx: + case Intrinsic::ppc_vsx_stxvd2x: + case Intrinsic::ppc_vsx_stxvw4x: + case Intrinsic::ppc_vsx_stxvd2x_be: + case Intrinsic::ppc_vsx_stxvw4x_be: + case Intrinsic::ppc_vsx_stxvl: + case Intrinsic::ppc_vsx_stxvll: { + Info.PtrVal = Inst->getArgOperand(1); + Info.ReadMem = false; + Info.WriteMem = true; + return true; + } + default: + break; + } + + return false; +} diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index a6dc96d42012d..ede5d61fb7a38 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -68,6 +68,7 @@ class PPCTTIImpl : public BasicTTIImplBase { bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo); + bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); void getPeelingPreferences(Loop *L, ScalarEvolution &SE, diff --git a/llvm/test/Transforms/LoopStrengthReduce/Power/memory-intrinsic.ll b/llvm/test/Transforms/LoopStrengthReduce/Power/memory-intrinsic.ll new file mode 100644 index 0000000000000..82e3ab761fb3d --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/Power/memory-intrinsic.ll @@ -0,0 +1,38 @@ +; REQUIRES: asserts +; RUN: opt -loop-reduce -debug-only=loop-reduce -S < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; CHECK: LSR Use: Kind=Address +; CHECK: LSR Use: Kind=Address +; CHECK-NOT: LSR Use: Kind=Basic +; CHECK-NOT: LSR Use: Kind=Basic + +declare <4 x i32> @llvm.ppc.altivec.lvx(i8*) +declare void @llvm.ppc.altivec.stvx(<4 x i32>, i8*) + +; Function Attrs: nofree norecurse nounwind +define void @foo(<4 x i32>* %0, <4 x i32>* %1, i32 signext %2) { + %4 = icmp sgt i32 %2, 0 + br i1 %4, label %5, label %7 + +5: ; preds = %3 + %6 = zext i32 %2 to i64 + br label %8 + +7: ; preds = %8, %3 + ret void + +8: ; preds = %5, %8 + %9 = phi i64 [ 0, %5 ], [ %15, %8 ] + %10 = getelementptr inbounds <4 x i32>, <4 x i32>* %1, i64 %9 + %11 = bitcast <4 x i32>* %10 to i8* + %12 = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %11) + %13 = getelementptr inbounds <4 x i32>, <4 x i32>* %0, i64 %9 + %14 = bitcast <4 x i32>* %13 to i8* + call void @llvm.ppc.altivec.stvx(<4 x i32> %12, i8* %14) + %15 = add nuw nsw i64 %9, 10 + %16 = icmp ult i64 %15, %6 + br i1 %16, label %8, label %7 +} From ed46e84c7aaffd847656ac559acb06089096ec33 Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Wed, 7 Oct 2020 00:12:21 -0400 Subject: [PATCH 197/321] [MachineInstr] exclude call instruction in mayAlias we now get noAlias result for a call instruction and other load/store/call instructions if we query mayAlias. This is not right as call instruction is not with mayloadorstore, but it may alter the memory. This patch fixes this wrong alias query. Differential Revision: https://reviews.llvm.org/D87490 --- llvm/lib/CodeGen/MachineInstr.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index ebae5eb380de8..c5c3f8c1186ee 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1236,6 +1236,11 @@ bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other, const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); + // Execulde call instruction which may alter the memory but can not be handled + // by this function. + if (isCall() || Other.isCall()) + return true; + // If neither instruction stores to memory, they can't alias in any // meaningful way, even if they read from the same address. if (!mayStore() && !Other.mayStore()) From 5a3f6bfe8a71b94728597aee12a4c36949d5f6af Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 29 Sep 2020 08:08:02 -0500 Subject: [PATCH 198/321] Reapply "[OpenMP][FIX] Verify compatible types for declare variant calls" D88384 This reapplies D88384 with the minor modification that an assertion was changed to a regular conditional and graceful exit from ASTContext::mergeTypes. --- clang/lib/AST/ASTContext.cpp | 4 +- clang/lib/Sema/SemaOpenMP.cpp | 18 +- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 6 + ...penmp-begin-declare-variant_template_3.cpp | 258 ++++++++++++++++++ 4 files changed, 281 insertions(+), 5 deletions(-) create mode 100644 clang/test/AST/ast-dump-openmp-begin-declare-variant_template_3.cpp diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 376a0b044010a..a82d95461bb98 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -9427,8 +9427,8 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS, // designates the object or function denoted by the reference, and the // expression is an lvalue unless the reference is an rvalue reference and // the expression is a function call (possibly inside parentheses). - assert(!LHS->getAs() && "LHS is a reference type?"); - assert(!RHS->getAs() && "RHS is a reference type?"); + if (LHS->getAs() || RHS->getAs()) + return {}; if (Unqualified) { LHS = LHS.getUnqualifiedType(); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 91897cbe25b43..d0b5a0d03e2d0 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -5921,7 +5921,6 @@ void Sema::ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( continue; QualType UDeclTy = UDecl->getType(); - // TODO: Verify types for templates eventually. if (!UDeclTy->isDependentType()) { QualType NewType = Context.mergeFunctionTypes( FType, UDeclTy, /* OfBlockPointer */ false, @@ -6009,6 +6008,8 @@ ExprResult Sema::ActOnOpenMPCall(ExprResult Call, Scope *Scope, TargetOMPContext OMPCtx(Context, std::move(DiagUnknownTrait), getCurFunctionDecl()); + QualType CalleeFnType = CalleeFnDecl->getType(); + SmallVector Exprs; SmallVector VMIs; while (CalleeFnDecl) { @@ -6061,8 +6062,19 @@ ExprResult Sema::ActOnOpenMPCall(ExprResult Call, Scope *Scope, } NewCall = BuildCallExpr(Scope, BestExpr, LParenLoc, ArgExprs, RParenLoc, ExecConfig); - if (NewCall.isUsable()) - break; + if (NewCall.isUsable()) { + if (CallExpr *NCE = dyn_cast(NewCall.get())) { + FunctionDecl *NewCalleeFnDecl = NCE->getDirectCallee(); + QualType NewType = Context.mergeFunctionTypes( + CalleeFnType, NewCalleeFnDecl->getType(), + /* OfBlockPointer */ false, + /* Unqualified */ false, /* AllowCXX */ true); + if (!NewType.isNull()) + break; + // Don't use the call if the function type was not compatible. + NewCall = nullptr; + } + } } VMIs.erase(VMIs.begin() + BestIdx); diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index cbbb44b82adce..9420bd04b7a97 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -444,6 +444,12 @@ static void instantiateOMPDeclareVariantAttr( New->getLocation()); if (!SubstFD) return; + QualType NewType = S.Context.mergeFunctionTypes( + SubstFD->getType(), FD->getType(), + /* OfBlockPointer */ false, + /* Unqualified */ false, /* AllowCXX */ true); + if (NewType.isNull()) + return; S.InstantiateFunctionDefinition( New->getLocation(), SubstFD, /* Recursive */ true, /* DefinitionRequired */ false, /* AtEndOfTU */ false); diff --git a/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_3.cpp b/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_3.cpp new file mode 100644 index 0000000000000..153764490c0dd --- /dev/null +++ b/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_3.cpp @@ -0,0 +1,258 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -verify -ast-dump %s -x c++| FileCheck %s +// expected-no-diagnostics +// PR47655 + +template struct S { + S(int, T *) {} +}; + +template +int also_before(T s) { + return 0; +} + +#pragma omp begin declare variant match(implementation = {extension(allow_templates)}) +template +int also_before(S s) { + // Ensure there is no error because this is never instantiated. + double t; + S q(1, &t); + return 1; +} +template +int special(S s) { + T t; + S q(0, &t); + return 0; +} +template +int also_after(S s) { + // Ensure there is no error because this is never instantiated. + double t; + S q(2.0, &t); + return 2; +} +#pragma omp end declare variant + +template +int also_after(T s) { + return 0; +} + +int test() { + // Should return 0. + return also_before(0) + also_after(0) + also_before(0.) + also_after(0.) + special(S(0, 0)); +} + +// CHECK: |-ClassTemplateDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:7:1> line:5:30 S +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_1:0x[a-z0-9]*]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | |-CXXRecordDecl [[ADDR_2:0x[a-z0-9]*]] line:5:30 struct S definition +// CHECK-NEXT: | | |-DefinitionData empty standard_layout trivially_copyable has_user_declared_ctor can_const_default_init +// CHECK-NEXT: | | | |-DefaultConstructor defaulted_is_constexpr +// CHECK-NEXT: | | | |-CopyConstructor simple trivial has_const_param needs_implicit implicit_has_const_param +// CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit +// CHECK-NEXT: | | | |-CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param +// CHECK-NEXT: | | | |-MoveAssignment exists simple trivial needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | |-CXXRecordDecl [[ADDR_3:0x[a-z0-9]*]] col:30 implicit referenced struct S +// CHECK-NEXT: | | `-CXXConstructorDecl [[ADDR_4:0x[a-z0-9]*]] col:3 S 'void (int, T *)' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_5:0x[a-z0-9]*]] col:8 'int' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_6:0x[a-z0-9]*]] col:13 'T *' +// CHECK-NEXT: | | `-CompoundStmt [[ADDR_7:0x[a-z0-9]*]] +// CHECK-NEXT: | |-ClassTemplateSpecializationDecl [[ADDR_8:0x[a-z0-9]*]] line:5:30 struct S definition +// CHECK-NEXT: | | |-DefinitionData pass_in_registers empty standard_layout trivially_copyable has_user_declared_ctor can_const_default_init +// CHECK-NEXT: | | | |-DefaultConstructor defaulted_is_constexpr +// CHECK-NEXT: | | | |-CopyConstructor simple trivial has_const_param implicit_has_const_param +// CHECK-NEXT: | | | |-MoveConstructor exists simple trivial +// CHECK-NEXT: | | | |-CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param +// CHECK-NEXT: | | | |-MoveAssignment exists simple trivial needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial +// CHECK-NEXT: | | |-TemplateArgument type 'int' +// CHECK-NEXT: | | | `-BuiltinType [[ADDR_9:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-CXXRecordDecl [[ADDR_10:0x[a-z0-9]*]] prev [[ADDR_8]] col:30 implicit struct S +// CHECK-NEXT: | | |-CXXConstructorDecl [[ADDR_11:0x[a-z0-9]*]] col:3 used S 'void (int, int *)' +// CHECK-NEXT: | | | |-ParmVarDecl [[ADDR_12:0x[a-z0-9]*]] col:8 'int' +// CHECK-NEXT: | | | |-ParmVarDecl [[ADDR_13:0x[a-z0-9]*]] col:13 'int *' +// CHECK-NEXT: | | | `-CompoundStmt [[ADDR_7]] +// CHECK-NEXT: | | |-CXXConstructorDecl [[ADDR_14:0x[a-z0-9]*]] col:30 implicit constexpr S 'void (const S &)' inline default trivial noexcept-unevaluated [[ADDR_14]] +// CHECK-NEXT: | | | `-ParmVarDecl [[ADDR_15:0x[a-z0-9]*]] col:30 'const S &' +// CHECK-NEXT: | | |-CXXConstructorDecl [[ADDR_16:0x[a-z0-9]*]] col:30 implicit used constexpr S 'void (S &&) noexcept' inline default trivial +// CHECK-NEXT: | | | |-ParmVarDecl [[ADDR_17:0x[a-z0-9]*]] col:30 'S &&' +// CHECK-NEXT: | | | `-CompoundStmt [[ADDR_18:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-CXXDestructorDecl [[ADDR_19:0x[a-z0-9]*]] col:30 implicit referenced ~S 'void ({{.*}}) noexcept' inline default trivial +// CHECK-NEXT: | `-ClassTemplateSpecializationDecl [[ADDR_20:0x[a-z0-9]*]] line:5:30 struct S +// CHECK-NEXT: | `-TemplateArgument type 'double' +// CHECK-NEXT: | `-BuiltinType [[ADDR_21:0x[a-z0-9]*]] 'double' +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_22:0x[a-z0-9]*]] line:10:5 also_before +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_23:0x[a-z0-9]*]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | |-FunctionDecl [[ADDR_24:0x[a-z0-9]*]] line:10:5 also_before 'int (T)' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_25:0x[a-z0-9]*]] col:19 s 'T' +// CHECK-NEXT: | | |-CompoundStmt [[ADDR_26:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-ReturnStmt [[ADDR_27:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_28:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: | | `-OMPDeclareVariantAttr [[ADDR_29:0x[a-z0-9]*]] <> Implicit implementation={extension(allow_templates)} +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_30:0x[a-z0-9]*]] 'int (S)' {{.*}}Function [[ADDR_31:0x[a-z0-9]*]] 'also_before[implementation={extension(allow_templates)}]' 'int (S)' +// CHECK-NEXT: | |-FunctionDecl [[ADDR_32:0x[a-z0-9]*]] line:10:5 used also_before 'int (int)' +// CHECK-NEXT: | | |-TemplateArgument type 'int' +// CHECK-NEXT: | | | `-BuiltinType [[ADDR_9]] 'int' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_33:0x[a-z0-9]*]] col:19 s 'int':'int' +// CHECK-NEXT: | | `-CompoundStmt [[ADDR_34:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ReturnStmt [[ADDR_35:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_28]] 'int' 0 +// CHECK-NEXT: | `-FunctionDecl [[ADDR_36:0x[a-z0-9]*]] line:10:5 used also_before 'int (double)' +// CHECK-NEXT: | |-TemplateArgument type 'double' +// CHECK-NEXT: | | `-BuiltinType [[ADDR_21]] 'double' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_37:0x[a-z0-9]*]] col:19 s 'double':'double' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_38:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ReturnStmt [[ADDR_39:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_28]] 'int' 0 +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_40:0x[a-z0-9]*]] line:16:1 also_before[implementation={extension(allow_templates)}] +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_41:0x[a-z0-9]*]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | |-FunctionDecl [[ADDR_31]] line:16:1 referenced also_before[implementation={extension(allow_templates)}] 'int (S)' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_42:0x[a-z0-9]*]] col:22 s 'S' +// CHECK-NEXT: | | `-CompoundStmt [[ADDR_43:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_44:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_45:0x[a-z0-9]*]] col:10 referenced t 'double' +// CHECK-NEXT: | | |-DeclStmt [[ADDR_46:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_47:0x[a-z0-9]*]] col:8 q 'S' callinit +// CHECK-NEXT: | | | `-ParenListExpr [[ADDR_48:0x[a-z0-9]*]] 'NULL TYPE' +// CHECK-NEXT: | | | |-IntegerLiteral [[ADDR_49:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | `-UnaryOperator [[ADDR_50:0x[a-z0-9]*]] 'double *' prefix '&' cannot overflow +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_51:0x[a-z0-9]*]] 'double' {{.*}}Var [[ADDR_45]] 't' 'double' +// CHECK-NEXT: | | `-ReturnStmt [[ADDR_52:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_53:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | |-FunctionDecl [[ADDR_54:0x[a-z0-9]*]] line:16:1 also_before[implementation={extension(allow_templates)}] 'int (S)' +// CHECK-NEXT: | | |-TemplateArgument type 'int' +// CHECK-NEXT: | | | `-BuiltinType [[ADDR_9]] 'int' +// CHECK-NEXT: | | `-ParmVarDecl [[ADDR_55:0x[a-z0-9]*]] col:22 s 'S':'S' +// CHECK-NEXT: | `-FunctionDecl [[ADDR_56:0x[a-z0-9]*]] line:16:1 also_before[implementation={extension(allow_templates)}] 'int (S)' +// CHECK-NEXT: | |-TemplateArgument type 'double' +// CHECK-NEXT: | | `-BuiltinType [[ADDR_21]] 'double' +// CHECK-NEXT: | `-ParmVarDecl [[ADDR_57:0x[a-z0-9]*]] col:22 s 'S':'S' +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_58:0x[a-z0-9]*]] col:5 implicit special +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_59:0x[a-z0-9]*]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | |-FunctionDecl [[ADDR_60:0x[a-z0-9]*]] col:5 special 'int (S)' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_61:0x[a-z0-9]*]] col:18 s 'S' +// CHECK-NEXT: | | `-OMPDeclareVariantAttr [[ADDR_62:0x[a-z0-9]*]] <> Implicit implementation={extension(allow_templates)} +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_63:0x[a-z0-9]*]] 'int (S)' {{.*}}Function [[ADDR_64:0x[a-z0-9]*]] 'special[implementation={extension(allow_templates)}]' 'int (S)' +// CHECK-NEXT: | `-FunctionDecl [[ADDR_65:0x[a-z0-9]*]] col:5 used special 'int (S)' +// CHECK-NEXT: | |-TemplateArgument type 'int' +// CHECK-NEXT: | | `-BuiltinType [[ADDR_9]] 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_66:0x[a-z0-9]*]] col:18 s 'S':'S' +// CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_67:0x[a-z0-9]*]] <> Implicit implementation={extension(allow_templates)} +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_68:0x[a-z0-9]*]] 'int (S)' {{.*}}Function [[ADDR_69:0x[a-z0-9]*]] 'special[implementation={extension(allow_templates)}]' 'int (S)' +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_70:0x[a-z0-9]*]] line:23:1 special[implementation={extension(allow_templates)}] +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_59]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | |-FunctionDecl [[ADDR_64]] line:23:1 referenced special[implementation={extension(allow_templates)}] 'int (S)' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_61]] col:18 s 'S' +// CHECK-NEXT: | | `-CompoundStmt [[ADDR_71:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_72:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_73:0x[a-z0-9]*]] col:5 referenced t 'T' +// CHECK-NEXT: | | |-DeclStmt [[ADDR_74:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_75:0x[a-z0-9]*]] col:8 q 'S' callinit +// CHECK-NEXT: | | | `-ParenListExpr [[ADDR_76:0x[a-z0-9]*]] 'NULL TYPE' +// CHECK-NEXT: | | | |-IntegerLiteral [[ADDR_77:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: | | | `-UnaryOperator [[ADDR_78:0x[a-z0-9]*]] '' prefix '&' cannot overflow +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_79:0x[a-z0-9]*]] 'T' {{.*}}Var [[ADDR_73]] 't' 'T' +// CHECK-NEXT: | | `-ReturnStmt [[ADDR_80:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_81:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: | `-FunctionDecl [[ADDR_69]] line:23:1 special[implementation={extension(allow_templates)}] 'int (S)' +// CHECK-NEXT: | |-TemplateArgument type 'int' +// CHECK-NEXT: | | `-BuiltinType [[ADDR_9]] 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_82:0x[a-z0-9]*]] col:18 s 'S':'S' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_83:0x[a-z0-9]*]] +// CHECK-NEXT: | |-DeclStmt [[ADDR_84:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-VarDecl [[ADDR_85:0x[a-z0-9]*]] col:5 used t 'int':'int' +// CHECK-NEXT: | |-DeclStmt [[ADDR_86:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-VarDecl [[ADDR_87:0x[a-z0-9]*]] col:8 q 'S':'S' callinit +// CHECK-NEXT: | | `-CXXConstructExpr [[ADDR_88:0x[a-z0-9]*]] 'S':'S' 'void (int, int *)' +// CHECK-NEXT: | | |-IntegerLiteral [[ADDR_77]] 'int' 0 +// CHECK-NEXT: | | `-UnaryOperator [[ADDR_89:0x[a-z0-9]*]] 'int *' prefix '&' cannot overflow +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_90:0x[a-z0-9]*]] 'int':'int' {{.*}}Var [[ADDR_85]] 't' 'int':'int' +// CHECK-NEXT: | `-ReturnStmt [[ADDR_91:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_81]] 'int' 0 +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_92:0x[a-z0-9]*]] col:5 implicit also_after +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_93:0x[a-z0-9]*]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | `-FunctionDecl [[ADDR_94:0x[a-z0-9]*]] col:5 also_after 'int (S)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_95:0x[a-z0-9]*]] col:21 s 'S' +// CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_96:0x[a-z0-9]*]] <> Implicit implementation={extension(allow_templates)} +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_97:0x[a-z0-9]*]] 'int (S)' {{.*}}Function [[ADDR_98:0x[a-z0-9]*]] 'also_after[implementation={extension(allow_templates)}]' 'int (S)' +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_99:0x[a-z0-9]*]] line:29:1 also_after[implementation={extension(allow_templates)}] +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_93]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | `-FunctionDecl [[ADDR_98]] line:29:1 also_after[implementation={extension(allow_templates)}] 'int (S)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_95]] col:21 s 'S' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_100:0x[a-z0-9]*]] +// CHECK-NEXT: | |-DeclStmt [[ADDR_101:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-VarDecl [[ADDR_102:0x[a-z0-9]*]] col:10 referenced t 'double' +// CHECK-NEXT: | |-DeclStmt [[ADDR_103:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-VarDecl [[ADDR_104:0x[a-z0-9]*]] col:8 q 'S' callinit +// CHECK-NEXT: | | `-ParenListExpr [[ADDR_105:0x[a-z0-9]*]] 'NULL TYPE' +// CHECK-NEXT: | | |-FloatingLiteral [[ADDR_106:0x[a-z0-9]*]] 'double' 2.000000e+00 +// CHECK-NEXT: | | `-UnaryOperator [[ADDR_107:0x[a-z0-9]*]] 'double *' prefix '&' cannot overflow +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_108:0x[a-z0-9]*]] 'double' {{.*}}Var [[ADDR_102]] 't' 'double' +// CHECK-NEXT: | `-ReturnStmt [[ADDR_109:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_110:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_111:0x[a-z0-9]*]] line:38:5 also_after +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_112:0x[a-z0-9]*]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | |-FunctionDecl [[ADDR_113:0x[a-z0-9]*]] line:38:5 also_after 'int (T)' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_114:0x[a-z0-9]*]] col:18 s 'T' +// CHECK-NEXT: | | `-CompoundStmt [[ADDR_115:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ReturnStmt [[ADDR_116:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_117:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: | |-FunctionDecl [[ADDR_118:0x[a-z0-9]*]] line:38:5 used also_after 'int (int)' +// CHECK-NEXT: | | |-TemplateArgument type 'int' +// CHECK-NEXT: | | | `-BuiltinType [[ADDR_9]] 'int' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_119:0x[a-z0-9]*]] col:18 s 'int':'int' +// CHECK-NEXT: | | `-CompoundStmt [[ADDR_120:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ReturnStmt [[ADDR_121:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_117]] 'int' 0 +// CHECK-NEXT: | `-FunctionDecl [[ADDR_122:0x[a-z0-9]*]] line:38:5 used also_after 'int (double)' +// CHECK-NEXT: | |-TemplateArgument type 'double' +// CHECK-NEXT: | | `-BuiltinType [[ADDR_21]] 'double' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_123:0x[a-z0-9]*]] col:18 s 'double':'double' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_124:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ReturnStmt [[ADDR_125:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_117]] 'int' 0 +// CHECK-NEXT: `-FunctionDecl [[ADDR_126:0x[a-z0-9]*]] line:42:5 test 'int ({{.*}})' +// CHECK-NEXT: `-CompoundStmt [[ADDR_127:0x[a-z0-9]*]] +// CHECK-NEXT: `-ReturnStmt [[ADDR_128:0x[a-z0-9]*]] +// CHECK-NEXT: `-ExprWithCleanups [[ADDR_129:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: `-BinaryOperator [[ADDR_130:0x[a-z0-9]*]] 'int' '+' +// CHECK-NEXT: |-BinaryOperator [[ADDR_131:0x[a-z0-9]*]] 'int' '+' +// CHECK-NEXT: | |-BinaryOperator [[ADDR_132:0x[a-z0-9]*]] 'int' '+' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_133:0x[a-z0-9]*]] 'int' '+' +// CHECK-NEXT: | | | |-CallExpr [[ADDR_134:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_135:0x[a-z0-9]*]] 'int (*)(int)' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_136:0x[a-z0-9]*]] 'int (int)' {{.*}}Function [[ADDR_32]] 'also_before' 'int (int)' (FunctionTemplate [[ADDR_22]] 'also_before') +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_137:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: | | | `-CallExpr [[ADDR_138:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_139:0x[a-z0-9]*]] 'int (*)(int)' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_140:0x[a-z0-9]*]] 'int (int)' {{.*}}Function [[ADDR_118]] 'also_after' 'int (int)' (FunctionTemplate [[ADDR_111]] 'also_after') +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_141:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: | | `-CallExpr [[ADDR_142:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-ImplicitCastExpr [[ADDR_143:0x[a-z0-9]*]] 'int (*)(double)' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_144:0x[a-z0-9]*]] 'int (double)' {{.*}}Function [[ADDR_36]] 'also_before' 'int (double)' (FunctionTemplate [[ADDR_22]] 'also_before') +// CHECK-NEXT: | | `-FloatingLiteral [[ADDR_145:0x[a-z0-9]*]] 'double' 0.000000e+00 +// CHECK-NEXT: | `-CallExpr [[ADDR_146:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | |-ImplicitCastExpr [[ADDR_147:0x[a-z0-9]*]] 'int (*)(double)' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_148:0x[a-z0-9]*]] 'int (double)' {{.*}}Function [[ADDR_122]] 'also_after' 'int (double)' (FunctionTemplate [[ADDR_111]] 'also_after') +// CHECK-NEXT: | `-FloatingLiteral [[ADDR_149:0x[a-z0-9]*]] 'double' 0.000000e+00 +// CHECK-NEXT: `-PseudoObjectExpr [[ADDR_150:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: |-CallExpr [[ADDR_151:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | |-ImplicitCastExpr [[ADDR_152:0x[a-z0-9]*]] 'int (*)(S)' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_153:0x[a-z0-9]*]] 'int (S)' {{.*}}Function [[ADDR_65]] 'special' 'int (S)' (FunctionTemplate [[ADDR_58]] 'special') +// CHECK-NEXT: | `-CXXConstructExpr [[ADDR_154:0x[a-z0-9]*]] 'S':'S' 'void (S &&) noexcept' elidable +// CHECK-NEXT: | `-MaterializeTemporaryExpr [[ADDR_155:0x[a-z0-9]*]] 'S':'S' xvalue +// CHECK-NEXT: | `-CXXTemporaryObjectExpr [[ADDR_156:0x[a-z0-9]*]] 'S':'S' 'void (int, int *)' +// CHECK-NEXT: | |-IntegerLiteral [[ADDR_157:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: | `-ImplicitCastExpr [[ADDR_158:0x[a-z0-9]*]] 'int *' +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_159:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: `-CallExpr [[ADDR_160:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: |-ImplicitCastExpr [[ADDR_161:0x[a-z0-9]*]] 'int (*)(S)' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_68]] 'int (S)' {{.*}}Function [[ADDR_69]] 'special[implementation={extension(allow_templates)}]' 'int (S)' +// CHECK-NEXT: `-CXXConstructExpr [[ADDR_162:0x[a-z0-9]*]] 'S':'S' 'void (S &&) noexcept' elidable +// CHECK-NEXT: `-MaterializeTemporaryExpr [[ADDR_163:0x[a-z0-9]*]] 'S':'S' xvalue +// CHECK-NEXT: `-CXXTemporaryObjectExpr [[ADDR_156]] 'S':'S' 'void (int, int *)' +// CHECK-NEXT: |-IntegerLiteral [[ADDR_157]] 'int' 0 +// CHECK-NEXT: `-ImplicitCastExpr [[ADDR_158]] 'int *' +// CHECK-NEXT: `-IntegerLiteral [[ADDR_159]] 'int' 0 From 0c009e092e29a3dff16c5c0522979341fab3be62 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 7 Oct 2020 12:18:24 +0700 Subject: [PATCH 199/321] [Test] Add test showing that we can avoid inserting trunc/zext --- .../IndVarSimplify/widen-loop-comp.ll | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll index d741514fa4fac..da2a2e5393798 100644 --- a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll +++ b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll @@ -544,3 +544,55 @@ define i32 @test10(i32 %v) { leave: ret i32 22 } + +; TODO: We don't really need trunc/zext here because when iv.next overflows, +; its value is not used. +define i32 @test11(i32 %start, i32* %p, i32* %q) { +; CHECK-LABEL: @test11( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[TMP1]], -1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[IV_NEXT]] to i64 +; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4 +; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; CHECK: failure: +; CHECK-NEXT: unreachable +; +entry: + br label %loop + +loop: + %iv = phi i32 [%start, %entry], [%iv.next, %backedge] + %iv.next = add i32 %iv, -1 + %cond = icmp eq i32 %iv, 0 + br i1 %cond, label %exit, label %backedge + +backedge: + %index = zext i32 %iv.next to i64 + %store.addr = getelementptr i32, i32* %p, i64 %index + store i32 1, i32* %store.addr + %load.addr = getelementptr i32, i32* %q, i64 %index + %stop = load i32, i32* %q + %loop.cond = icmp eq i32 %stop, 0 + br i1 %loop.cond, label %loop, label %failure + +exit: + ret i32 0 + +failure: + unreachable +} From edd71db38b0c2292e6a36fb789a3ec3cbde6a023 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 6 Oct 2020 20:29:09 -0700 Subject: [PATCH 200/321] BPF: avoid duplicated globals for CORE relocations This patch fixed two issues related with relocation globals. In LLVM, if a global, e.g. with name "g", is created and conflict with another global with the same name, LLVM will rename the global, e.g., with a new name "g.2". Since relocation global name has special meaning, we do not want llvm to change it, so internally we have logic to check whether duplication happens or not. If happens, just reuse the previous global. The first bug is related to non-btf-id relocation (BPFAbstractMemberAccess.cpp). Commit 54d9f743c8b0 ("BPF: move AbstractMemberAccess and PreserveDIType passes to EP_EarlyAsPossible") changed ModulePass to FunctionPass, i.e., handling each function at a time. But still just one BPFAbstractMemberAccess object is created so module level de-duplication still possible. Commit 40251fee0084 ("[BPF][NewPM] Make BPFTargetMachine properly adjust NPM optimizer pipeline") made a change to create a BPFAbstractMemberAccess object per function so module level de-duplication is not possible any more without going through all module globals. This patch simply changed the map which holds reloc globals as class static, so it will be available to all BPFAbstractMemberAccess objects for different functions. The second bug is related to btf-id relocation (BPFPreserveDIType.cpp). Before Commit 54d9f743c8b0, the pass is a ModulePass, so we have a local variable, incremented for each instance, and works fine. But after Commit 54d9f743c8b0, the pass becomes a FunctionPass. Local variable won't work properly since different functions will start with the same initial value. Fix the issue by change the local count variable as static, so it will be truely unique across the whole module compilation. Differential Revision: https://reviews.llvm.org/D88942 --- .../Target/BPF/BPFAbstractMemberAccess.cpp | 4 +- llvm/lib/Target/BPF/BPFPreserveDIType.cpp | 2 +- .../test/CodeGen/BPF/CORE/btf-id-duplicate.ll | 97 ++++++++++++++++ .../CodeGen/BPF/CORE/field-reloc-duplicate.ll | 106 ++++++++++++++++++ 4 files changed, 207 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/BPF/CORE/btf-id-duplicate.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/field-reloc-duplicate.ll diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp index d04b0644ac497..cd994a9c83658 100644 --- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -141,7 +141,7 @@ class BPFAbstractMemberAccess final { const DataLayout *DL = nullptr; Module *M = nullptr; - std::map GEPGlobals; + static std::map GEPGlobals; // A map to link preserve_*_access_index instrinsic calls. std::map> AIChain; // A map to hold all the base preserve_*_access_index instrinsic calls. @@ -179,6 +179,8 @@ class BPFAbstractMemberAccess final { bool transformGEPChain(CallInst *Call, CallInfo &CInfo); }; +std::map BPFAbstractMemberAccess::GEPGlobals; + class BPFAbstractMemberAccessLegacyPass final : public FunctionPass { BPFTargetMachine *TM; diff --git a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp index d2e969780786f..75febbe4b1380 100644 --- a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp +++ b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp @@ -68,7 +68,7 @@ static bool BPFPreserveDITypeImpl(Function &F) { return false; std::string BaseName = "llvm.btf_type_id."; - int Count = 0; + static int Count = 0; for (auto Call : PreserveDITypeCalls) { const ConstantInt *Flag = dyn_cast(Call->getArgOperand(1)); assert(Flag); diff --git a/llvm/test/CodeGen/BPF/CORE/btf-id-duplicate.ll b/llvm/test/CodeGen/BPF/CORE/btf-id-duplicate.ll new file mode 100644 index 0000000000000..070ac3f51763b --- /dev/null +++ b/llvm/test/CodeGen/BPF/CORE/btf-id-duplicate.ll @@ -0,0 +1,97 @@ +; RUN: opt -O2 -mtriple=bpf-pc-linux %s | llvm-dis > %t1 +; RUN: llc -o - %t1 | FileCheck %s +; RUN: opt -passes='default' -mtriple=bpf-pc-linux %s | llvm-dis > %t1 +; RUN: llc -o - %t1 | FileCheck %s +; +; Source: +; struct s1 { int a; int b; }; +; int foo(struct s1 *arg) { return __builtin_btf_type_id(*arg, 0); } +; int bar(struct s1 *arg) { return __builtin_btf_type_id(*arg, 0); } +; Compilation flag: +; clang -target bpf -O2 -S -emit-llvm -g -Xclang -disable-llvm-passes test.c + +%struct.s1 = type { i32, i32 } + +; Function Attrs: nounwind +define dso_local i32 @foo(%struct.s1* %arg) #0 !dbg !7 { +entry: + %arg.addr = alloca %struct.s1*, align 8 + store %struct.s1* %arg, %struct.s1** %arg.addr, align 8, !tbaa !18 + call void @llvm.dbg.declare(metadata %struct.s1** %arg.addr, metadata !17, metadata !DIExpression()), !dbg !22 + %0 = call i32 @llvm.bpf.btf.type.id(i32 0, i64 0), !dbg !23, !llvm.preserve.access.index !12 + ret i32 %0, !dbg !24 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.bpf.btf.type.id(i32, i64) #2 + +; Function Attrs: nounwind +define dso_local i32 @bar(%struct.s1* %arg) #0 !dbg !25 { +entry: + %arg.addr = alloca %struct.s1*, align 8 + store %struct.s1* %arg, %struct.s1** %arg.addr, align 8, !tbaa !18 + call void @llvm.dbg.declare(metadata %struct.s1** %arg.addr, metadata !27, metadata !DIExpression()), !dbg !28 + %0 = call i32 @llvm.bpf.btf.type.id(i32 1, i64 0), !dbg !29, !llvm.preserve.access.index !12 + ret i32 %0, !dbg !30 +} + +; CHECK: .long 1 # BTF_KIND_STRUCT(id = 2) + +; CHECK: .ascii "s1" # string offset=1 +; CHECK: .ascii ".text" # string offset=20 +; CHECK: .byte 48 # string offset=26 + +; CHECK: .long 16 # FieldReloc +; CHECK-NEXT: .long 20 # Field reloc section string offset=20 +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 26 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 26 +; CHECK-NEXT: .long 6 + +attributes #0 = { nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable willreturn } +attributes #2 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0 (https://github.com/llvm/llvm-project.git 80a3f7beebd8caab358ff063526ae2d26467c029)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/home/yhs/work/tests/dup") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git 80a3f7beebd8caab358ff063526ae2d26467c029)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !16) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !11} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) +!12 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s1", file: !1, line: 1, size: 64, elements: !13) +!13 = !{!14, !15} +!14 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !12, file: !1, line: 1, baseType: !10, size: 32) +!15 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !12, file: !1, line: 1, baseType: !10, size: 32, offset: 32) +!16 = !{!17} +!17 = !DILocalVariable(name: "arg", arg: 1, scope: !7, file: !1, line: 2, type: !11) +!18 = !{!19, !19, i64 0} +!19 = !{!"any pointer", !20, i64 0} +!20 = !{!"omnipotent char", !21, i64 0} +!21 = !{!"Simple C/C++ TBAA"} +!22 = !DILocation(line: 2, column: 20, scope: !7) +!23 = !DILocation(line: 2, column: 34, scope: !7) +!24 = !DILocation(line: 2, column: 27, scope: !7) +!25 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !26) +!26 = !{!27} +!27 = !DILocalVariable(name: "arg", arg: 1, scope: !25, file: !1, line: 3, type: !11) +!28 = !DILocation(line: 3, column: 20, scope: !25) +!29 = !DILocation(line: 3, column: 34, scope: !25) +!30 = !DILocation(line: 3, column: 27, scope: !25) diff --git a/llvm/test/CodeGen/BPF/CORE/field-reloc-duplicate.ll b/llvm/test/CodeGen/BPF/CORE/field-reloc-duplicate.ll new file mode 100644 index 0000000000000..b83f45a6b91a1 --- /dev/null +++ b/llvm/test/CodeGen/BPF/CORE/field-reloc-duplicate.ll @@ -0,0 +1,106 @@ +; RUN: opt -O2 -mtriple=bpf-pc-linux %s | llvm-dis > %t1 +; RUN: llc -o - %t1 | FileCheck %s +; RUN: opt -passes='default' -mtriple=bpf-pc-linux %s | llvm-dis > %t1 +; RUN: llc -o - %t1 | FileCheck %s +; +; Source: +; struct s1 { int a; int b; } __attribute__((preserve_access_index)); +; int foo(struct s1 *arg) { return arg->a; } +; int bar(struct s1 *arg) { return arg->a; } +; Compilation flag: +; clang -target bpf -O2 -S -emit-llvm -g -Xclang -disable-llvm-passes test.c + +%struct.s1 = type { i32, i32 } + +; Function Attrs: nounwind +define dso_local i32 @foo(%struct.s1* %arg) #0 !dbg !7 { +entry: + %arg.addr = alloca %struct.s1*, align 8 + store %struct.s1* %arg, %struct.s1** %arg.addr, align 8, !tbaa !18 + call void @llvm.dbg.declare(metadata %struct.s1** %arg.addr, metadata !17, metadata !DIExpression()), !dbg !22 + %0 = load %struct.s1*, %struct.s1** %arg.addr, align 8, !dbg !23, !tbaa !18 + %1 = call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s1s(%struct.s1* %0, i32 0, i32 0), !dbg !24, !llvm.preserve.access.index !12 + %2 = load i32, i32* %1, align 4, !dbg !24, !tbaa !25 + ret i32 %2, !dbg !28 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: nounwind readnone +declare i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s1s(%struct.s1*, i32 immarg, i32 immarg) #2 + +; Function Attrs: nounwind +define dso_local i32 @bar(%struct.s1* %arg) #0 !dbg !29 { +entry: + %arg.addr = alloca %struct.s1*, align 8 + store %struct.s1* %arg, %struct.s1** %arg.addr, align 8, !tbaa !18 + call void @llvm.dbg.declare(metadata %struct.s1** %arg.addr, metadata !31, metadata !DIExpression()), !dbg !32 + %0 = load %struct.s1*, %struct.s1** %arg.addr, align 8, !dbg !33, !tbaa !18 + %1 = call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s1s(%struct.s1* %0, i32 0, i32 0), !dbg !34, !llvm.preserve.access.index !12 + %2 = load i32, i32* %1, align 4, !dbg !34, !tbaa !25 + ret i32 %2, !dbg !35 +} + +; CHECK: .long 1 # BTF_KIND_STRUCT(id = 2) + +; CHECK: .ascii "s1" # string offset=1 +; CHECK: .ascii ".text" # string offset=20 +; CHECK: .ascii "0:0" # string offset=26 + +; CHECK: .long 16 # FieldReloc +; CHECK-NEXT: .long 20 # Field reloc section string offset=20 +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 26 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 26 +; CHECK-NEXT: .long 0 + +attributes #0 = { nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable willreturn } +attributes #2 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0 (https://github.com/llvm/llvm-project.git 2f40e20613758b3e11a15494c09f4b6973673d6b)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/work/tests/core") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git 2f40e20613758b3e11a15494c09f4b6973673d6b)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !16) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !11} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) +!12 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s1", file: !1, line: 1, size: 64, elements: !13) +!13 = !{!14, !15} +!14 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !12, file: !1, line: 1, baseType: !10, size: 32) +!15 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !12, file: !1, line: 1, baseType: !10, size: 32, offset: 32) +!16 = !{!17} +!17 = !DILocalVariable(name: "arg", arg: 1, scope: !7, file: !1, line: 2, type: !11) +!18 = !{!19, !19, i64 0} +!19 = !{!"any pointer", !20, i64 0} +!20 = !{!"omnipotent char", !21, i64 0} +!21 = !{!"Simple C/C++ TBAA"} +!22 = !DILocation(line: 2, column: 20, scope: !7) +!23 = !DILocation(line: 2, column: 34, scope: !7) +!24 = !DILocation(line: 2, column: 39, scope: !7) +!25 = !{!26, !27, i64 0} +!26 = !{!"s1", !27, i64 0, !27, i64 4} +!27 = !{!"int", !20, i64 0} +!28 = !DILocation(line: 2, column: 27, scope: !7) +!29 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !30) +!30 = !{!31} +!31 = !DILocalVariable(name: "arg", arg: 1, scope: !29, file: !1, line: 3, type: !11) +!32 = !DILocation(line: 3, column: 20, scope: !29) +!33 = !DILocation(line: 3, column: 34, scope: !29) +!34 = !DILocation(line: 3, column: 39, scope: !29) +!35 = !DILocation(line: 3, column: 27, scope: !29) From 7fa503ef4aaddc1c31dd36d970aa6609383e1718 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 7 Oct 2020 09:17:23 +0300 Subject: [PATCH 201/321] [SROA] rewritePartition()/findCommonType(): if uses have conflicting type, try getTypePartition() before falling back to largest integral use type (PR47592) And another step towards transformss not introducing inttoptr and/or ptrtoint casts that weren't there already. In this case, when load/store uses have conflicting types, instead of falling back to the iN, we can try to use allocated sub-type. As disscussed, this isn't the best idea overall (we shouldn't rely on allocated type), but it works fine as a temporary measure. I've measured, and @ `-O3` as of vanilla llvm test-suite + RawSpeed, this results in +0.05% more bitcasts, -5.51% less inttoptr and -1.05% less ptrtoint (at the end of middle-end opt pipeline) See https://bugs.llvm.org/show_bug.cgi?id=47592 Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D88788 --- llvm/lib/Transforms/Scalar/SROA.cpp | 22 +++++++++++++------ llvm/test/DebugInfo/ARM/sroa-complex.ll | 7 +++--- llvm/test/Transforms/SROA/ppcf128-no-fold.ll | 6 ++--- llvm/test/Transforms/SROA/preserve-nonnull.ll | 12 +++++----- 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 7b9ce8b2f472c..09e30a46c1673 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1128,9 +1128,9 @@ LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); } /// Walk the range of a partitioning looking for a common type to cover this /// sequence of slices. -static Type *findCommonType(AllocaSlices::const_iterator B, - AllocaSlices::const_iterator E, - uint64_t EndOffset) { +static std::pair +findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, + uint64_t EndOffset) { Type *Ty = nullptr; bool TyIsCommon = true; IntegerType *ITy = nullptr; @@ -1174,7 +1174,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B, Ty = UserTy; } - return TyIsCommon ? Ty : ITy; + return {TyIsCommon ? Ty : nullptr, ITy}; } /// PHI instructions that use an alloca and are subsequently loaded can be @@ -4264,13 +4264,21 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // or an i8 array of an appropriate size. Type *SliceTy = nullptr; const DataLayout &DL = AI.getModule()->getDataLayout(); - if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset())) - if (DL.getTypeAllocSize(CommonUseTy).getFixedSize() >= P.size()) - SliceTy = CommonUseTy; + std::pair CommonUseTy = + findCommonType(P.begin(), P.end(), P.endOffset()); + // Do all uses operate on the same type? + if (CommonUseTy.first) + if (DL.getTypeAllocSize(CommonUseTy.first).getFixedSize() >= P.size()) + SliceTy = CommonUseTy.first; + // If not, can we find an appropriate subtype in the original allocated type? if (!SliceTy) if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), P.beginOffset(), P.size())) SliceTy = TypePartitionTy; + // If still not, can we use the largest bitwidth integer type used? + if (!SliceTy && CommonUseTy.second) + if (DL.getTypeAllocSize(CommonUseTy.second).getFixedSize() >= P.size()) + SliceTy = CommonUseTy.second; if ((!SliceTy || (SliceTy->isArrayTy() && SliceTy->getArrayElementType()->isIntegerTy())) && DL.isLegalInteger(P.size() * 8)) diff --git a/llvm/test/DebugInfo/ARM/sroa-complex.ll b/llvm/test/DebugInfo/ARM/sroa-complex.ll index 50e30ce68bbdd..9c92218845c7f 100644 --- a/llvm/test/DebugInfo/ARM/sroa-complex.ll +++ b/llvm/test/DebugInfo/ARM/sroa-complex.ll @@ -15,14 +15,13 @@ entry: %c.realp = getelementptr inbounds { double, double }, { double, double }* %c, i32 0, i32 0, !dbg !17 %c.imagp = getelementptr inbounds { double, double }, { double, double }* %c, i32 0, i32 1, !dbg !17 store double 0.000000e+00, double* %c.realp, align 8, !dbg !17 - ; SROA will split the complex double into two i64 values, because there is - ; no native double data type available. + ; SROA will split the complex double into two double values. ; Test that debug info for both values survives: - ; CHECK: call void @llvm.dbg.value(metadata i64 0, + ; CHECK: call void @llvm.dbg.value(metadata double 0.000000e+00, ; CHECK-SAME: metadata ![[C:[^,]*]], ; CHECK-SAME: metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)) store double 0.000000e+00, double* %c.imagp, align 8, !dbg !17 - ; CHECK: call void @llvm.dbg.value(metadata i64 0, + ; CHECK: call void @llvm.dbg.value(metadata double 0.000000e+00, ; CHECK-SAME: metadata ![[C]], ; CHECK-SAME: metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) ret void, !dbg !18 diff --git a/llvm/test/Transforms/SROA/ppcf128-no-fold.ll b/llvm/test/Transforms/SROA/ppcf128-no-fold.ll index 3f2934cbe1664..4981f6d77a7e2 100644 --- a/llvm/test/Transforms/SROA/ppcf128-no-fold.ll +++ b/llvm/test/Transforms/SROA/ppcf128-no-fold.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -sroa -S | FileCheck %s +; RUN: opt < %s -sroa -S | FileCheck %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -27,8 +27,8 @@ entry: ; CHECK-LABEL: @foo ; CHECK-NOT: i128 4628293042053316608 ; CHECK-NOT: i128 4653260752096854016 -; CHECK-DAG: i128 bitcast (ppc_fp128 0xM403B0000000000000000000000000000 to i128) -; CHECK-DAG: i128 bitcast (ppc_fp128 0xM4093B400000000000000000000000000 to i128) +; CHECK-DAG: bitcast ppc_fp128 0xM403B0000000000000000000000000000 to i128 +; CHECK-DAG: bitcast ppc_fp128 0xM4093B400000000000000000000000000 to i128 ; CHECK: call void @bar(i8* %v, [2 x i128] ; CHECK: ret void diff --git a/llvm/test/Transforms/SROA/preserve-nonnull.ll b/llvm/test/Transforms/SROA/preserve-nonnull.ll index 284a6154cc177..c9db06dca1dc6 100644 --- a/llvm/test/Transforms/SROA/preserve-nonnull.ll +++ b/llvm/test/Transforms/SROA/preserve-nonnull.ll @@ -51,11 +51,10 @@ entry: define i8* @propagate_nonnull_to_int() { ; CHECK-LABEL: define i8* @propagate_nonnull_to_int( ; CHECK-NEXT: entry: -; CHECK-NEXT: %[[A:.*]] = alloca i64 -; CHECK-NEXT: store i64 42, i64* %[[A]] -; CHECK-NEXT: %[[LOAD:.*]] = load volatile i64, i64* %[[A]] -; CHECK-NEXT: %[[CAST:.*]] = inttoptr i64 %[[LOAD]] to i8* -; CHECK-NEXT: ret i8* %[[CAST]] +; CHECK-NEXT: %[[A:.*]] = alloca i8* +; CHECK-NEXT: store i8* inttoptr (i64 42 to i8*), i8** %[[A]] +; CHECK-NEXT: %[[LOAD:.*]] = load volatile i8*, i8** %[[A]] +; CHECK-NEXT: ret i8* %[[LOAD]] entry: %a = alloca [2 x i8*] %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0 @@ -75,8 +74,7 @@ entry: define i8* @propagate_nonnull_to_int_and_promote() { ; CHECK-LABEL: define i8* @propagate_nonnull_to_int_and_promote( ; CHECK-NEXT: entry: -; CHECK-NEXT: %[[PROMOTED_VALUE:.*]] = inttoptr i64 42 to i8* -; CHECK-NEXT: ret i8* %[[PROMOTED_VALUE]] +; CHECK-NEXT: ret i8* inttoptr (i64 42 to i8*) entry: %a = alloca [2 x i8*], align 8 %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0 From 0fcacefd160494280dc040f4f055db6df695ac12 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 6 Oct 2020 23:33:04 -0700 Subject: [PATCH 202/321] [lldb] Format unix signal table (NFC) Restore unix signal table to its original glory and mark it as not to be clang-formatted. --- lldb/source/Target/UnixSignals.cpp | 86 ++++++++++++++---------------- 1 file changed, 40 insertions(+), 46 deletions(-) diff --git a/lldb/source/Target/UnixSignals.cpp b/lldb/source/Target/UnixSignals.cpp index dce32adbf0a36..f6b4e82a88ed9 100644 --- a/lldb/source/Target/UnixSignals.cpp +++ b/lldb/source/Target/UnixSignals.cpp @@ -65,55 +65,49 @@ UnixSignals::UnixSignals(const UnixSignals &rhs) : m_signals(rhs.m_signals) {} UnixSignals::~UnixSignals() = default; void UnixSignals::Reset() { - // This builds one standard set of Unix Signals. If yours aren't quite in + // This builds one standard set of Unix Signals. If yours aren't quite in // this order, you can either subclass this class, and use Add & Remove to - // change them - // or you can subclass and build them afresh in your constructor; + // change them or you can subclass and build them afresh in your constructor. // - // Note: the signals below are the Darwin signals. Do not change these! + // Note: the signals below are the Darwin signals. Do not change these! + m_signals.clear(); - // SIGNO NAME SUPPRESS STOP NOTIFY DESCRIPTION - // ====== ============ ======== ====== ====== - // =================================================== - AddSignal(1, "SIGHUP", false, true, true, "hangup"); - AddSignal(2, "SIGINT", true, true, true, "interrupt"); - AddSignal(3, "SIGQUIT", false, true, true, "quit"); - AddSignal(4, "SIGILL", false, true, true, "illegal instruction"); - AddSignal(5, "SIGTRAP", true, true, true, - "trace trap (not reset when caught)"); - AddSignal(6, "SIGABRT", false, true, true, "abort()"); - AddSignal(7, "SIGEMT", false, true, true, "pollable event"); - AddSignal(8, "SIGFPE", false, true, true, "floating point exception"); - AddSignal(9, "SIGKILL", false, true, true, "kill"); - AddSignal(10, "SIGBUS", false, true, true, "bus error"); - AddSignal(11, "SIGSEGV", false, true, true, "segmentation violation"); - AddSignal(12, "SIGSYS", false, true, true, "bad argument to system call"); - AddSignal(13, "SIGPIPE", false, false, false, - "write on a pipe with no one to read it"); - AddSignal(14, "SIGALRM", false, false, false, "alarm clock"); - AddSignal(15, "SIGTERM", false, true, true, - "software termination signal from kill"); - AddSignal(16, "SIGURG", false, false, false, - "urgent condition on IO channel"); - AddSignal(17, "SIGSTOP", true, true, true, - "sendable stop signal not from tty"); - AddSignal(18, "SIGTSTP", false, true, true, "stop signal from tty"); - AddSignal(19, "SIGCONT", false, true, true, "continue a stopped process"); - AddSignal(20, "SIGCHLD", false, false, false, - "to parent on child stop or exit"); - AddSignal(21, "SIGTTIN", false, true, true, - "to readers process group upon background tty read"); - AddSignal(22, "SIGTTOU", false, true, true, - "to readers process group upon background tty write"); - AddSignal(23, "SIGIO", false, false, false, "input/output possible signal"); - AddSignal(24, "SIGXCPU", false, true, true, "exceeded CPU time limit"); - AddSignal(25, "SIGXFSZ", false, true, true, "exceeded file size limit"); - AddSignal(26, "SIGVTALRM", false, false, false, "virtual time alarm"); - AddSignal(27, "SIGPROF", false, false, false, "profiling time alarm"); - AddSignal(28, "SIGWINCH", false, false, false, "window size changes"); - AddSignal(29, "SIGINFO", false, true, true, "information request"); - AddSignal(30, "SIGUSR1", false, true, true, "user defined signal 1"); - AddSignal(31, "SIGUSR2", false, true, true, "user defined signal 2"); + + // clang-format off + // SIGNO NAME SUPPRESS STOP NOTIFY DESCRIPTION + // ====== ============ ======== ====== ====== =================================================== + AddSignal(1, "SIGHUP", false, true, true, "hangup"); + AddSignal(2, "SIGINT", true, true, true, "interrupt"); + AddSignal(3, "SIGQUIT", false, true, true, "quit"); + AddSignal(4, "SIGILL", false, true, true, "illegal instruction"); + AddSignal(5, "SIGTRAP", true, true, true, "trace trap (not reset when caught)"); + AddSignal(6, "SIGABRT", false, true, true, "abort()"); + AddSignal(7, "SIGEMT", false, true, true, "pollable event"); + AddSignal(8, "SIGFPE", false, true, true, "floating point exception"); + AddSignal(9, "SIGKILL", false, true, true, "kill"); + AddSignal(10, "SIGBUS", false, true, true, "bus error"); + AddSignal(11, "SIGSEGV", false, true, true, "segmentation violation"); + AddSignal(12, "SIGSYS", false, true, true, "bad argument to system call"); + AddSignal(13, "SIGPIPE", false, false, false, "write on a pipe with no one to read it"); + AddSignal(14, "SIGALRM", false, false, false, "alarm clock"); + AddSignal(15, "SIGTERM", false, true, true, "software termination signal from kill"); + AddSignal(16, "SIGURG", false, false, false, "urgent condition on IO channel"); + AddSignal(17, "SIGSTOP", true, true, true, "sendable stop signal not from tty"); + AddSignal(18, "SIGTSTP", false, true, true, "stop signal from tty"); + AddSignal(19, "SIGCONT", false, true, true, "continue a stopped process"); + AddSignal(20, "SIGCHLD", false, false, false, "to parent on child stop or exit"); + AddSignal(21, "SIGTTIN", false, true, true, "to readers process group upon background tty read"); + AddSignal(22, "SIGTTOU", false, true, true, "to readers process group upon background tty write"); + AddSignal(23, "SIGIO", false, false, false, "input/output possible signal"); + AddSignal(24, "SIGXCPU", false, true, true, "exceeded CPU time limit"); + AddSignal(25, "SIGXFSZ", false, true, true, "exceeded file size limit"); + AddSignal(26, "SIGVTALRM", false, false, false, "virtual time alarm"); + AddSignal(27, "SIGPROF", false, false, false, "profiling time alarm"); + AddSignal(28, "SIGWINCH", false, false, false, "window size changes"); + AddSignal(29, "SIGINFO", false, true, true, "information request"); + AddSignal(30, "SIGUSR1", false, true, true, "user defined signal 1"); + AddSignal(31, "SIGUSR2", false, true, true, "user defined signal 2"); + // clang-format on } void UnixSignals::AddSignal(int signo, const char *name, bool default_suppress, From fba42aea438cc4c93233a10703e83f45035ffa64 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 7 Oct 2020 13:52:48 +0700 Subject: [PATCH 203/321] [NFC] Use getZero instead of getConstant(0) --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index e8b38dcdc885f..dd85fafc3a133 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1363,7 +1363,7 @@ void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); bool NonNegativeDef = SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, - SE->getConstant(NarrowSCEV->getType(), 0)); + SE->getZero(NarrowSCEV->getType())); for (User *U : NarrowDef->users()) { Instruction *NarrowUser = cast(U); From 53b3873cf428fd78f1d92504cc20adf11181ead7 Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Wed, 7 Oct 2020 09:08:41 +0200 Subject: [PATCH 204/321] [Support][unittests] Enforce alignment in ConvertUTFTest `LLVM-Unit :: Support/./SupportTests/ConvertUTFTest.ConvertUTF16LittleEndianToUTF8String` `FAIL`s on Solaris/sparcv9: In `llvm/lib/Support/ConvertUTFWrapper.cpp` (`convertUTF16ToUTF8String`) the `SrcBytes` arg is reinterpreted/accessed as `UTF16` (`unsigned short`, which requires 2-byte alignment on strict-alignment targets like Sparc) without anything guaranteeing the alignment, so the access yields a `SIGBUS`. This patch avoids this by enforcing the required alignment in the callers. Tested on `sparcv9-sun-solaris2.11`. Differential Revision: https://reviews.llvm.org/D88824 --- llvm/lib/Support/ConvertUTFWrapper.cpp | 2 ++ llvm/unittests/Support/ConvertUTFTest.cpp | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Support/ConvertUTFWrapper.cpp b/llvm/lib/Support/ConvertUTFWrapper.cpp index 6ec567882ea6b..d8d46712a5935 100644 --- a/llvm/lib/Support/ConvertUTFWrapper.cpp +++ b/llvm/lib/Support/ConvertUTFWrapper.cpp @@ -97,6 +97,8 @@ bool convertUTF16ToUTF8String(ArrayRef SrcBytes, std::string &Out) { const UTF16 *Src = reinterpret_cast(SrcBytes.begin()); const UTF16 *SrcEnd = reinterpret_cast(SrcBytes.end()); + assert((uintptr_t)Src % sizeof(UTF16) == 0); + // Byteswap if necessary. std::vector ByteSwapped; if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) { diff --git a/llvm/unittests/Support/ConvertUTFTest.cpp b/llvm/unittests/Support/ConvertUTFTest.cpp index 83019722332d3..b689e688f720e 100644 --- a/llvm/unittests/Support/ConvertUTFTest.cpp +++ b/llvm/unittests/Support/ConvertUTFTest.cpp @@ -16,7 +16,7 @@ using namespace llvm; TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) { // Src is the look of disapproval. - static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c"; + alignas(UTF16) static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c"; ArrayRef Ref(Src, sizeof(Src) - 1); std::string Result; bool Success = convertUTF16ToUTF8String(Ref, Result); @@ -27,7 +27,7 @@ TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) { TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) { // Src is the look of disapproval. - static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0"; + alignas(UTF16) static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0"; ArrayRef Ref(Src, sizeof(Src) - 1); std::string Result; bool Success = convertUTF16ToUTF8String(Ref, Result); From 334ec6f807fa65e09571fa42a0c3be0eb39e7c0f Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 7 Oct 2020 09:33:57 +0200 Subject: [PATCH 205/321] [AST][RecoveryExpr] Support dependent conditional operators in C for error recovery. suppress spurious "typecheck_cond_expect_scalar" diagnostic. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D84322 --- clang/lib/Sema/SemaExpr.cpp | 10 ++++++++++ clang/test/AST/ast-dump-recovery.c | 10 ++++++++++ clang/test/Sema/error-dependence.c | 8 +++++++- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index ee41d5f5b37de..17bb82af975f9 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -8067,6 +8067,16 @@ QualType Sema::CheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, VK = VK_RValue; OK = OK_Ordinary; + if (Context.isDependenceAllowed() && + (Cond.get()->isTypeDependent() || LHS.get()->isTypeDependent() || + RHS.get()->isTypeDependent())) { + assert(!getLangOpts().CPlusPlus); + assert(Cond.get()->containsErrors() || LHS.get()->containsErrors() || + RHS.get()->containsErrors() && + "should only occur in error-recovery path."); + return Context.DependentTy; + } + // The OpenCL operator with a vector condition is sufficiently // different to merit its own checker. if ((getLangOpts().OpenCL && Cond.get()->getType()->isVectorType()) || diff --git a/clang/test/AST/ast-dump-recovery.c b/clang/test/AST/ast-dump-recovery.c index 66830e072a2ac..7b2bcf27eccee 100644 --- a/clang/test/AST/ast-dump-recovery.c +++ b/clang/test/AST/ast-dump-recovery.c @@ -71,4 +71,14 @@ void test2() { // CHECK-NEXT: | `-DeclRefExpr {{.*}} 'some_func' // CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 1 some_func(), 1; + + // conditional operator (comparison is invalid) + float f; + // CHECK: ConditionalOperator {{.*}} '' contains-errors + // CHECK-NEXT: |-RecoveryExpr {{.*}} '' + // CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int *' lvalue + // CHECK-NEXT: | `-DeclRefExpr {{.*}} 'float' lvalue + // CHECK-NEXT: |-DeclRefExpr {{.*}} 'int *' lvalue + // CHECK-NEXT: `-DeclRefExpr {{.*}} 'float' lvalue + (ptr > f ? ptr : f); } diff --git a/clang/test/Sema/error-dependence.c b/clang/test/Sema/error-dependence.c index a98b021094de3..b83a79f8c4c65 100644 --- a/clang/test/Sema/error-dependence.c +++ b/clang/test/Sema/error-dependence.c @@ -1,9 +1,15 @@ // RUN: %clang_cc1 -fsyntax-only -verify -frecovery-ast -fno-recovery-ast-type %s -int call(int); // expected-note {{'call' declared here}} +int call(int); // expected-note2 {{'call' declared here}} void test1(int s) { // verify "assigning to 'int' from incompatible type ''" is // not emitted. s = call(); // expected-error {{too few arguments to function call}} } + +void test2(int* ptr, float f) { + // verify diagnostic "used type '' where arithmetic or pointer + // type is required" is not emitted. + (call() ? ptr : f); // expected-error {{too few arguments to function call}} +} From 31dc90801746e12d6ae1f967f455cf43a5bbb039 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 7 Oct 2020 09:50:43 +0200 Subject: [PATCH 206/321] [clang] Use isCompoundAssignmentOp to simplify the code, NFC. --- clang/lib/Sema/SemaOverload.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 4696ed56dc71d..43b2ad48de1fc 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -13345,14 +13345,14 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc, if (Fns.empty()) { // If there are no functions to store, just build a dependent // BinaryOperator or CompoundAssignment. - if (Opc <= BO_Assign || Opc > BO_OrAssign) - return BinaryOperator::Create( - Context, Args[0], Args[1], Opc, Context.DependentTy, VK_RValue, - OK_Ordinary, OpLoc, CurFPFeatureOverrides()); - return CompoundAssignOperator::Create( - Context, Args[0], Args[1], Opc, Context.DependentTy, VK_LValue, - OK_Ordinary, OpLoc, CurFPFeatureOverrides(), Context.DependentTy, - Context.DependentTy); + if (BinaryOperator::isCompoundAssignmentOp(Opc)) + return CompoundAssignOperator::Create( + Context, Args[0], Args[1], Opc, Context.DependentTy, VK_LValue, + OK_Ordinary, OpLoc, CurFPFeatureOverrides(), Context.DependentTy, + Context.DependentTy); + return BinaryOperator::Create(Context, Args[0], Args[1], Opc, + Context.DependentTy, VK_RValue, OK_Ordinary, + OpLoc, CurFPFeatureOverrides()); } // FIXME: save results of ADL from here? From f24649b77d856157c64841457dcc4f70530d607c Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 7 Oct 2020 10:01:04 +0200 Subject: [PATCH 207/321] [clangd] Don't set the Underlying bit on targets of UsingDecls. With this patch, we don't treat `using ns::X` as a first-class declaration like `using Z = ns::Y`, reference to X that goes through this using-decl is considered a direct reference (without the Underlying bit). Fix the workaround in https://reviews.llvm.org/D87225 and https://reviews.llvm.org/D74054. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D88472 --- clang-tools-extra/clangd/FindTarget.cpp | 6 ++--- clang-tools-extra/clangd/FindTarget.h | 15 ++++++++---- clang-tools-extra/clangd/XRefs.cpp | 23 ------------------- .../clangd/unittests/FindTargetTests.cpp | 13 ++++------- .../clangd/unittests/XRefsTests.cpp | 9 ++++---- 5 files changed, 24 insertions(+), 42 deletions(-) diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp index 9db814368a024..4cf62d3d1539c 100644 --- a/clang-tools-extra/clangd/FindTarget.cpp +++ b/clang-tools-extra/clangd/FindTarget.cpp @@ -342,8 +342,9 @@ struct TargetFinder { add(TND->getUnderlyingType(), Flags | Rel::Underlying); Flags |= Rel::Alias; // continue with the alias. } else if (const UsingDecl *UD = dyn_cast(D)) { + // no Underlying as this is a non-renaming alias. for (const UsingShadowDecl *S : UD->shadows()) - add(S->getUnderlyingDecl(), Flags | Rel::Underlying); + add(S->getUnderlyingDecl(), Flags); Flags |= Rel::Alias; // continue with the alias. } else if (const auto *NAD = dyn_cast(D)) { add(NAD->getUnderlyingDecl(), Flags | Rel::Underlying); @@ -354,7 +355,7 @@ struct TargetFinder { UUVD->getQualifier()->getAsType(), [UUVD](ASTContext &) { return UUVD->getNameInfo().getName(); }, ValueFilter)) { - add(Target, Flags | Rel::Underlying); + add(Target, Flags); // no Underlying as this is a non-renaming alias } Flags |= Rel::Alias; // continue with the alias } else if (const UsingShadowDecl *USD = dyn_cast(D)) { @@ -364,7 +365,6 @@ struct TargetFinder { // Shadow decls are synthetic and not themselves interesting. // Record the underlying decl instead, if allowed. D = USD->getTargetDecl(); - Flags |= Rel::Underlying; // continue with the underlying decl. } else if (const auto *DG = dyn_cast(D)) { D = DG->getDeducedTemplate(); } else if (const ObjCImplementationDecl *IID = diff --git a/clang-tools-extra/clangd/FindTarget.h b/clang-tools-extra/clangd/FindTarget.h index 48ad9e6513bbb..f328ae358c065 100644 --- a/clang-tools-extra/clangd/FindTarget.h +++ b/clang-tools-extra/clangd/FindTarget.h @@ -102,13 +102,20 @@ enum class DeclRelation : unsigned { TemplatePattern, // Alias options apply when the declaration is an alias. - // e.g. namespace clang { [[StringRef]] S; } + // e.g. namespace client { [[X]] x; } /// This declaration is an alias that was referred to. - /// e.g. using llvm::StringRef (the UsingDecl directly referenced). + /// e.g. using ns::X (the UsingDecl directly referenced), + /// using Z = ns::Y (the TypeAliasDecl directly referenced) Alias, - /// This is the underlying declaration for an alias, decltype etc. - /// e.g. class llvm::StringRef (the underlying declaration referenced). + /// This is the underlying declaration for a renaming-alias, decltype etc. + /// e.g. class ns::Y (the underlying declaration referenced). + /// + /// Note that we don't treat `using ns::X` as a first-class declaration like + /// `using Z = ns::Y`. Therefore reference to X that goes through this + /// using-decl is considered a direct reference (without the Underlying bit). + /// Nevertheless, we report `using ns::X` as an Alias, so that some features + /// like go-to-definition can still target it. Underlying, }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, DeclRelation); diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index 9532e1418cca7..9469ab46c9fc3 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -343,18 +343,6 @@ locateASTReferent(SourceLocation CurLoc, const syntax::Token *TouchedIdentifier, } } - // Give the underlying decl if navigation is triggered on a non-renaming - // alias. - if (llvm::isa(D) || llvm::isa(D)) { - // FIXME: address more complicated cases. TargetDecl(... Underlying) gives - // all overload candidates, we only want the targeted one if the cursor is - // on an using-alias usage, workround it with getDeclAtPosition. - llvm::for_each( - getDeclAtPosition(AST, CurLoc, DeclRelation::Underlying, NodeKind), - [&](const NamedDecl *UD) { AddResultDecl(UD); }); - continue; - } - // Special case: if the class name is selected, also map Objective-C // categories and category implementations back to their class interface. // @@ -1159,17 +1147,6 @@ ReferencesResult findReferences(ParsedAST &AST, Position Pos, uint32_t Limit, DeclRelation::TemplatePattern | DeclRelation::Alias; std::vector Decls = getDeclAtPosition(AST, *CurLoc, Relations); - std::vector NonrenamingAliasUnderlyingDecls; - // If the results include a *non-renaming* alias, get its - // underlying decls as well. (See similar logic in locateASTReferent()). - for (const NamedDecl *D : Decls) { - if (llvm::isa(D) || llvm::isa(D)) { - for (const NamedDecl *AD : - getDeclAtPosition(AST, *CurLoc, DeclRelation::Underlying)) - NonrenamingAliasUnderlyingDecls.push_back(AD); - } - } - llvm::copy(NonrenamingAliasUnderlyingDecls, std::back_inserter(Decls)); // We traverse the AST to find references in the main file. auto MainFileRefs = findRefs(Decls, AST); diff --git a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp index 5bfdaaf6c3434..e4f584bea01f8 100644 --- a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp +++ b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp @@ -181,8 +181,7 @@ TEST_F(TargetDeclTest, UsingDecl) { int x = [[f]](42); )cpp"; // f(char) is not referenced! - EXPECT_DECLS("DeclRefExpr", {"using foo::f", Rel::Alias}, - {"int f(int)", Rel::Underlying}); + EXPECT_DECLS("DeclRefExpr", {"using foo::f", Rel::Alias}, {"int f(int)"}); Code = R"cpp( namespace foo { @@ -192,9 +191,8 @@ TEST_F(TargetDeclTest, UsingDecl) { [[using foo::f]]; )cpp"; // All overloads are referenced. - EXPECT_DECLS("UsingDecl", {"using foo::f", Rel::Alias}, - {"int f(int)", Rel::Underlying}, - {"int f(char)", Rel::Underlying}); + EXPECT_DECLS("UsingDecl", {"using foo::f", Rel::Alias}, {"int f(int)"}, + {"int f(char)"}); Code = R"cpp( struct X { @@ -205,8 +203,7 @@ TEST_F(TargetDeclTest, UsingDecl) { }; int x = Y().[[foo]](); )cpp"; - EXPECT_DECLS("MemberExpr", {"using X::foo", Rel::Alias}, - {"int foo()", Rel::Underlying}); + EXPECT_DECLS("MemberExpr", {"using X::foo", Rel::Alias}, {"int foo()"}); Code = R"cpp( template @@ -219,7 +216,7 @@ TEST_F(TargetDeclTest, UsingDecl) { }; )cpp"; EXPECT_DECLS("UnresolvedUsingValueDecl", {"using Base::waldo", Rel::Alias}, - {"void waldo()", Rel::Underlying}); + {"void waldo()"}); } TEST_F(TargetDeclTest, ConstructorInitList) { diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp index 40637b5fa7582..9f77db39a3e49 100644 --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -1118,17 +1118,18 @@ TEST(LocateSymbol, Alias) { // decls. R"cpp( namespace ns { class [[Foo]] {}; } - using ns::F^oo; + // FIXME: don't return the using decl if it touches the cursor position. + using ns::[[F^oo]]; )cpp", R"cpp( namespace ns { int [[x]](char); int [[x]](double); } - using ns::^x; + using ns::[[^x]]; )cpp", R"cpp( namespace ns { int [[x]](char); int x(double); } - using ns::x; + using ns::[[x]]; int y = ^x('a'); )cpp", @@ -1156,7 +1157,7 @@ TEST(LocateSymbol, Alias) { }; template struct Derived : Base { - using Base::w^aldo; + using Base::[[w^aldo]]; }; )cpp", }; From 872d72eeeb7eeea05e5812967faf9801fdbe48c4 Mon Sep 17 00:00:00 2001 From: Tres Popp Date: Wed, 7 Oct 2020 10:04:45 +0200 Subject: [PATCH 208/321] [mlir][NFC] Style cleanup in comments --- mlir/include/mlir/Dialect/Shape/Transforms/Passes.td | 2 +- mlir/lib/Dialect/Shape/Transforms/ShapeTypeConversion.cpp | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td index 09cc7a1a5c93c..e3b6a476a9f5f 100644 --- a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td @@ -21,7 +21,7 @@ def ShapeToShapeLowering : FunctionPass<"shape-to-shape-lowering"> { let constructor = "mlir::createShapeToShapeLowering()"; } -// TODO(tpopp): Generalize this to allow any type conversions desired. +// TODO: Generalize this to allow any type conversions desired. def ShapeTensorToMemref : FunctionPass<"shape-tensor-to-memref"> { let summary = "Replace tensors involving shape operations with memrefs"; let constructor = "mlir::createShapeTensorToMemrefPass()"; diff --git a/mlir/lib/Dialect/Shape/Transforms/ShapeTypeConversion.cpp b/mlir/lib/Dialect/Shape/Transforms/ShapeTypeConversion.cpp index 98398fbc70e64..2baa1175807c0 100644 --- a/mlir/lib/Dialect/Shape/Transforms/ShapeTypeConversion.cpp +++ b/mlir/lib/Dialect/Shape/Transforms/ShapeTypeConversion.cpp @@ -1,5 +1,4 @@ -//=====------- ShapeTypeConversion.cpp - Shape Type Conversions ----------*- C++ -//-*-=====// +//====----- ShapeTypeConversion.cpp - Shape Type Conversions ----*- C++-*--===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -82,7 +81,7 @@ struct ShapeTensorToMemrefPass /// Populates `patterns` with the conversion patterns of tensor->memref. // -// TODO(tpopp): Change this to work generally with any type conversions. +// TODO: Change this to work generally with any type conversions. void mlir::populateShapeTypeConversionPatterns( MLIRContext *context, BufferAssignmentTypeConverter *converter, OwningRewritePatternList *patterns) { From 85a6f8fc9600164021a56126216ce3c6ef805458 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 7 Oct 2020 15:04:40 +0700 Subject: [PATCH 209/321] [Test] Add one more test where we can avoid creating trunc --- .../IndVarSimplify/widen-loop-comp.ll | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll index da2a2e5393798..19091f2f32117 100644 --- a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll +++ b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll @@ -596,3 +596,53 @@ exit: failure: unreachable } + +define i32 @test12(i32 %start, i32* %p, i32* %q) { +; CHECK-LABEL: @test12( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[TMP1]], -1 +; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[IV_NEXT]] to i64 +; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4 +; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; CHECK: failure: +; CHECK-NEXT: unreachable +; +entry: + br label %loop + +loop: + %iv = phi i32 [%start, %entry], [%iv.next, %backedge] + %cond = icmp eq i32 %iv, 0 + br i1 %cond, label %exit, label %backedge + +backedge: + %iv.next = add i32 %iv, -1 + %index = zext i32 %iv.next to i64 + %store.addr = getelementptr i32, i32* %p, i64 %index + store i32 1, i32* %store.addr + %load.addr = getelementptr i32, i32* %q, i64 %index + %stop = load i32, i32* %q + %loop.cond = icmp eq i32 %stop, 0 + br i1 %loop.cond, label %loop, label %failure + +exit: + ret i32 0 + +failure: + unreachable +} From 1aa8e6a51a0eb378ae7b2f1934946fc25f298905 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 30 Sep 2020 18:50:34 +0100 Subject: [PATCH 210/321] [SDag] SimplifyDemandedBits: simplify to FP constant if all bits known We were already doing this for integer constants. This patch implements the same thing for floating point constants. Differential Revision: https://reviews.llvm.org/D88570 --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 13 +++++- llvm/test/CodeGen/ARM/fcopysign.ll | 5 +- llvm/test/CodeGen/X86/combine-bextr.ll | 10 ++-- .../X86/copysign-constant-magnitude.ll | 12 ++--- llvm/test/CodeGen/X86/fp-intrinsics.ll | 19 ++++---- llvm/test/CodeGen/X86/fp-round.ll | 14 +++--- .../CodeGen/X86/fp-strict-scalar-inttofp.ll | 33 ++++++------- llvm/test/CodeGen/X86/fp128-cast.ll | 3 +- llvm/test/CodeGen/X86/scalar-int-to-fp.ll | 19 ++++---- llvm/test/CodeGen/X86/uint_to_fp-2.ll | 15 +++--- .../CodeGen/X86/vector-shuffle-combining.ll | 46 ++++--------------- 11 files changed, 76 insertions(+), 113 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index c54f6996c007d..8f3aae3eff0c7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -917,6 +917,13 @@ bool TargetLowering::SimplifyDemandedBits( return false; } + if (Op.getOpcode() == ISD::ConstantFP) { + // We know all of the bits for a floating point constant! + Known.One = cast(Op)->getValueAPF().bitcastToAPInt(); + Known.Zero = ~Known.One; + return false; + } + // Other users may use these bits. EVT VT = Op.getValueType(); if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { @@ -2254,9 +2261,13 @@ bool TargetLowering::SimplifyDemandedBits( if (C->isOpaque()) return false; } - // TODO: Handle float bits as well. if (VT.isInteger()) return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT)); + if (VT.isFloatingPoint()) + return TLO.CombineTo( + Op, + TLO.DAG.getConstantFP( + APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT)); } return false; diff --git a/llvm/test/CodeGen/ARM/fcopysign.ll b/llvm/test/CodeGen/ARM/fcopysign.ll index 930ef1f2d20b4..b183418ca74f7 100644 --- a/llvm/test/CodeGen/ARM/fcopysign.ll +++ b/llvm/test/CodeGen/ARM/fcopysign.ll @@ -95,8 +95,9 @@ define float @test4() nounwind { ; HARD-NEXT: vcvt.f32.f64 s0, d16 ; HARD-NEXT: vmov.i32 d17, #0x80000000 ; HARD-NEXT: vshr.u64 d16, d16, #32 -; HARD-NEXT: vmov.f32 s2, #5.000000e-01 -; HARD-NEXT: vbit d1, d16, d17 +; HARD-NEXT: vmov.i32 d18, #0x3f000000 +; HARD-NEXT: vorr d1, d17, d17 +; HARD-NEXT: vbsl d1, d16, d18 ; HARD-NEXT: vadd.f32 s0, s0, s2 ; HARD-NEXT: pop {r11, pc} entry: diff --git a/llvm/test/CodeGen/X86/combine-bextr.ll b/llvm/test/CodeGen/X86/combine-bextr.ll index 2ec55109c337e..a6cf651a39929 100644 --- a/llvm/test/CodeGen/X86/combine-bextr.ll +++ b/llvm/test/CodeGen/X86/combine-bextr.ll @@ -39,12 +39,10 @@ define float @bextr_uitofp(i32 %x, i32 %y) { ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: movl $3855, %eax # imm = 0xF0F ; X32-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: movd %eax, %xmm1 -; X32-NEXT: por %xmm0, %xmm1 -; X32-NEXT: subsd %xmm0, %xmm1 -; X32-NEXT: xorps %xmm0, %xmm0 -; X32-NEXT: cvtsd2ss %xmm1, %xmm0 +; X32-NEXT: movd %eax, %xmm0 +; X32-NEXT: por {{\.LCPI.*}}, %xmm0 +; X32-NEXT: subsd {{\.LCPI.*}}, %xmm0 +; X32-NEXT: cvtsd2ss %xmm0, %xmm0 ; X32-NEXT: movss %xmm0, (%esp) ; X32-NEXT: flds (%esp) ; X32-NEXT: popl %eax diff --git a/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll b/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll index c147c2919f838..dd7d4a84a18a0 100644 --- a/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll +++ b/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll @@ -25,8 +25,7 @@ define double @mag_pos0_double(double %x) nounwind { define double @mag_neg0_double(double %x) nounwind { ; CHECK-LABEL: mag_neg0_double: ; CHECK: ## %bb.0: -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: andps %xmm1, %xmm0 +; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %y = call double @copysign(double -0.0, double %x) ret double %y @@ -42,8 +41,7 @@ define double @mag_pos1_double(double %x) nounwind { ; CHECK-LABEL: mag_pos1_double: ; CHECK: ## %bb.0: ; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: orps %xmm1, %xmm0 +; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %y = call double @copysign(double 1.0, double %x) ret double %y @@ -87,8 +85,7 @@ define float @mag_pos0_float(float %x) nounwind { define float @mag_neg0_float(float %x) nounwind { ; CHECK-LABEL: mag_neg0_float: ; CHECK: ## %bb.0: -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: andps %xmm1, %xmm0 +; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %y = call float @copysignf(float -0.0, float %x) ret float %y @@ -106,8 +103,7 @@ define float @mag_pos1_float(float %x) nounwind { ; CHECK-LABEL: mag_pos1_float: ; CHECK: ## %bb.0: ; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: orps %xmm1, %xmm0 +; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %y = call float @copysignf(float 1.0, float %x) ret float %y diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll index 657731c231c6b..b8a974f795b2c 100644 --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -2438,11 +2438,10 @@ define double @uifdi(i32 %x) #0 { ; X86-SSE: # %bb.0: # %entry ; X86-SSE-NEXT: subl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 16 -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: orpd %xmm0, %xmm1 -; X86-SSE-NEXT: subsd %xmm0, %xmm1 -; X86-SSE-NEXT: movsd %xmm1, (%esp) +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: orpd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: subsd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: movsd %xmm0, (%esp) ; X86-SSE-NEXT: fldl (%esp) ; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $12, %esp @@ -2644,12 +2643,10 @@ define float @uiffi(i32 %x) #0 { ; X86-SSE: # %bb.0: # %entry ; X86-SSE-NEXT: pushl %eax ; X86-SSE-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: orpd %xmm0, %xmm1 -; X86-SSE-NEXT: subsd %xmm0, %xmm1 -; X86-SSE-NEXT: xorps %xmm0, %xmm0 -; X86-SSE-NEXT: cvtsd2ss %xmm1, %xmm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: orpd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: subsd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 ; X86-SSE-NEXT: movss %xmm0, (%esp) ; X86-SSE-NEXT: flds (%esp) ; X86-SSE-NEXT: wait diff --git a/llvm/test/CodeGen/X86/fp-round.ll b/llvm/test/CodeGen/X86/fp-round.ll index 9d40593b25744..255ad732f9d62 100644 --- a/llvm/test/CodeGen/X86/fp-round.ll +++ b/llvm/test/CodeGen/X86/fp-round.ll @@ -13,11 +13,10 @@ define float @round_f32(float %x) { ; SSE41: ## %bb.0: ; SSE41-NEXT: movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; SSE41-NEXT: andps %xmm0, %xmm1 -; SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE41-NEXT: orps %xmm1, %xmm2 -; SSE41-NEXT: addss %xmm0, %xmm2 +; SSE41-NEXT: orps {{.*}}(%rip), %xmm1 +; SSE41-NEXT: addss %xmm0, %xmm1 ; SSE41-NEXT: xorps %xmm0, %xmm0 -; SSE41-NEXT: roundss $11, %xmm2, %xmm0 +; SSE41-NEXT: roundss $11, %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: round_f32: @@ -51,11 +50,10 @@ define double @round_f64(double %x) { ; SSE41: ## %bb.0: ; SSE41-NEXT: movapd {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] ; SSE41-NEXT: andpd %xmm0, %xmm1 -; SSE41-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; SSE41-NEXT: orpd %xmm1, %xmm2 -; SSE41-NEXT: addsd %xmm0, %xmm2 +; SSE41-NEXT: orpd {{.*}}(%rip), %xmm1 +; SSE41-NEXT: addsd %xmm0, %xmm1 ; SSE41-NEXT: xorps %xmm0, %xmm0 -; SSE41-NEXT: roundsd $11, %xmm2, %xmm0 +; SSE41-NEXT: roundsd $11, %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: round_f64: diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll index 0a50f3df2ac23..21b09e2d25079 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll @@ -487,12 +487,10 @@ define float @uitofp_i32tof32(i32 %x) #0 { ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %eax ; SSE-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-X86-NEXT: orpd %xmm0, %xmm1 -; SSE-X86-NEXT: subsd %xmm0, %xmm1 -; SSE-X86-NEXT: xorps %xmm0, %xmm0 -; SSE-X86-NEXT: cvtsd2ss %xmm1, %xmm0 +; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-X86-NEXT: orpd {{\.LCPI.*}}, %xmm0 +; SSE-X86-NEXT: subsd {{\.LCPI.*}}, %xmm0 +; SSE-X86-NEXT: cvtsd2ss %xmm0, %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) ; SSE-X86-NEXT: wait @@ -510,10 +508,9 @@ define float @uitofp_i32tof32(i32 %x) #0 { ; AVX1-X86: # %bb.0: ; AVX1-X86-NEXT: pushl %eax ; AVX1-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX1-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX1-X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-X86-NEXT: vorpd %xmm0, %xmm1, %xmm1 -; AVX1-X86-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; AVX1-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX1-X86-NEXT: vorpd {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX1-X86-NEXT: vsubsd {{\.LCPI.*}}, %xmm0, %xmm0 ; AVX1-X86-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 ; AVX1-X86-NEXT: vmovss %xmm0, (%esp) ; AVX1-X86-NEXT: flds (%esp) @@ -1166,11 +1163,10 @@ define double @uitofp_i32tof64(i32 %x) #0 { ; SSE-X86-NEXT: .cfi_def_cfa_register %ebp ; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp -; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-X86-NEXT: orpd %xmm0, %xmm1 -; SSE-X86-NEXT: subsd %xmm0, %xmm1 -; SSE-X86-NEXT: movsd %xmm1, (%esp) +; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-X86-NEXT: orpd {{\.LCPI.*}}, %xmm0 +; SSE-X86-NEXT: subsd {{\.LCPI.*}}, %xmm0 +; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp @@ -1193,10 +1189,9 @@ define double @uitofp_i32tof64(i32 %x) #0 { ; AVX1-X86-NEXT: .cfi_def_cfa_register %ebp ; AVX1-X86-NEXT: andl $-8, %esp ; AVX1-X86-NEXT: subl $8, %esp -; AVX1-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX1-X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-X86-NEXT: vorpd %xmm0, %xmm1, %xmm1 -; AVX1-X86-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; AVX1-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX1-X86-NEXT: vorpd {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX1-X86-NEXT: vsubsd {{\.LCPI.*}}, %xmm0, %xmm0 ; AVX1-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX1-X86-NEXT: fldl (%esp) ; AVX1-X86-NEXT: wait diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll index 8878273ff2053..a566c85ea35d2 100644 --- a/llvm/test/CodeGen/X86/fp128-cast.ll +++ b/llvm/test/CodeGen/X86/fp128-cast.ll @@ -1260,8 +1260,7 @@ define fp128 @TestTruncCopysign(fp128 %x, i32 %n) nounwind { ; X64-SSE-NEXT: pushq %rax ; X64-SSE-NEXT: callq __trunctfdf2 ; X64-SSE-NEXT: andps {{.*}}(%rip), %xmm0 -; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X64-SSE-NEXT: orps %xmm1, %xmm0 +; X64-SSE-NEXT: orps {{.*}}(%rip), %xmm0 ; X64-SSE-NEXT: callq __extenddftf2 ; X64-SSE-NEXT: addq $8, %rsp ; X64-SSE-NEXT: .LBB26_2: # %cleanup diff --git a/llvm/test/CodeGen/X86/scalar-int-to-fp.ll b/llvm/test/CodeGen/X86/scalar-int-to-fp.ll index 67545a36168db..9458c84600f77 100644 --- a/llvm/test/CodeGen/X86/scalar-int-to-fp.ll +++ b/llvm/test/CodeGen/X86/scalar-int-to-fp.ll @@ -32,12 +32,10 @@ define float @u32_to_f(i32 %a) nounwind { ; SSE2_32-LABEL: u32_to_f: ; SSE2_32: # %bb.0: ; SSE2_32-NEXT: pushl %eax -; SSE2_32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE2_32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2_32-NEXT: orpd %xmm0, %xmm1 -; SSE2_32-NEXT: subsd %xmm0, %xmm1 -; SSE2_32-NEXT: xorps %xmm0, %xmm0 -; SSE2_32-NEXT: cvtsd2ss %xmm1, %xmm0 +; SSE2_32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2_32-NEXT: orpd {{\.LCPI.*}}, %xmm0 +; SSE2_32-NEXT: subsd {{\.LCPI.*}}, %xmm0 +; SSE2_32-NEXT: cvtsd2ss %xmm0, %xmm0 ; SSE2_32-NEXT: movss %xmm0, (%esp) ; SSE2_32-NEXT: flds (%esp) ; SSE2_32-NEXT: popl %eax @@ -148,11 +146,10 @@ define double @u32_to_d(i32 %a) nounwind { ; SSE2_32-NEXT: movl %esp, %ebp ; SSE2_32-NEXT: andl $-8, %esp ; SSE2_32-NEXT: subl $8, %esp -; SSE2_32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE2_32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2_32-NEXT: orpd %xmm0, %xmm1 -; SSE2_32-NEXT: subsd %xmm0, %xmm1 -; SSE2_32-NEXT: movsd %xmm1, (%esp) +; SSE2_32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2_32-NEXT: orpd {{\.LCPI.*}}, %xmm0 +; SSE2_32-NEXT: subsd {{\.LCPI.*}}, %xmm0 +; SSE2_32-NEXT: movsd %xmm0, (%esp) ; SSE2_32-NEXT: fldl (%esp) ; SSE2_32-NEXT: movl %ebp, %esp ; SSE2_32-NEXT: popl %ebp diff --git a/llvm/test/CodeGen/X86/uint_to_fp-2.ll b/llvm/test/CodeGen/X86/uint_to_fp-2.ll index f925488632f89..c9211540329b1 100644 --- a/llvm/test/CodeGen/X86/uint_to_fp-2.ll +++ b/llvm/test/CodeGen/X86/uint_to_fp-2.ll @@ -6,12 +6,10 @@ define float @test1(i32 %x) nounwind readnone { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushl %eax -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: orpd %xmm0, %xmm1 -; CHECK-NEXT: subsd %xmm0, %xmm1 -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: cvtsd2ss %xmm1, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: orpd {{\.LCPI.*}}, %xmm0 +; CHECK-NEXT: subsd {{\.LCPI.*}}, %xmm0 +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 ; CHECK-NEXT: movss %xmm0, (%esp) ; CHECK-NEXT: flds (%esp) ; CHECK-NEXT: popl %eax @@ -28,9 +26,8 @@ define float @test2(<4 x i32> %x) nounwind readnone ssp { ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: orps %xmm0, %xmm1 -; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: orps {{\.LCPI.*}}, %xmm1 +; CHECK-NEXT: subsd {{\.LCPI.*}}, %xmm1 ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsd2ss %xmm1, %xmm0 ; CHECK-NEXT: movss %xmm0, (%esp) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index 8bdd2451434e6..1393cf256028d 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -3061,42 +3061,16 @@ define <8 x i16> @shuffle_scalar_to_vector_extract(<8 x i8>* %p0, i8* %p1, i8* % } define void @PR43024() { -; SSE2-LABEL: PR43024: -; SSE2: # %bb.0: -; SSE2-NEXT: movaps {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] -; SSE2-NEXT: movaps %xmm0, (%rax) -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: addss %xmm0, %xmm1 -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: addss %xmm0, %xmm1 -; SSE2-NEXT: addss %xmm0, %xmm1 -; SSE2-NEXT: movss %xmm1, (%rax) -; SSE2-NEXT: retq -; -; SSSE3-LABEL: PR43024: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movaps {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] -; SSSE3-NEXT: movaps %xmm0, (%rax) -; SSSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSSE3-NEXT: addss %xmm0, %xmm1 -; SSSE3-NEXT: xorps %xmm0, %xmm0 -; SSSE3-NEXT: addss %xmm0, %xmm1 -; SSSE3-NEXT: addss %xmm0, %xmm1 -; SSSE3-NEXT: movss %xmm1, (%rax) -; SSSE3-NEXT: retq -; -; SSE41-LABEL: PR43024: -; SSE41: # %bb.0: -; SSE41-NEXT: movaps {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] -; SSE41-NEXT: movaps %xmm0, (%rax) -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: addss %xmm0, %xmm1 -; SSE41-NEXT: xorps %xmm0, %xmm0 -; SSE41-NEXT: addss %xmm0, %xmm1 -; SSE41-NEXT: addss %xmm0, %xmm1 -; SSE41-NEXT: movss %xmm1, (%rax) -; SSE41-NEXT: retq +; SSE-LABEL: PR43024: +; SSE: # %bb.0: +; SSE-NEXT: movaps {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] +; SSE-NEXT: movaps %xmm0, (%rax) +; SSE-NEXT: addss {{.*}}(%rip), %xmm0 +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: addss %xmm1, %xmm0 +; SSE-NEXT: addss %xmm1, %xmm0 +; SSE-NEXT: movss %xmm0, (%rax) +; SSE-NEXT: retq ; ; AVX-LABEL: PR43024: ; AVX: # %bb.0: From 20cfd5fa3362c71d0184639decf5c2acb743e4e6 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 7 Oct 2020 11:14:55 +0100 Subject: [PATCH 211/321] [LAA] Add test for PR47751, which currently uses wrong bounds. --- .../runtime-check-pointer-element-type.ll | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll new file mode 100644 index 0000000000000..f9ec18cb6fd88 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll @@ -0,0 +1,42 @@ +; RUN: opt -loop-vectorize -force-vector-width=2 %s -S -debug 2>&1 | FileCheck %s +; RUN: opt -passes='loop-vectorize' -force-vector-width=2 %s -S -debug 2>&1 | FileCheck %s + +; REQUIRES: asserts + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +; FIXME +; Test case for PR47751. Make sure the runtime check includes a required +; addition of the size of the element type (a pointer) for the end bound. + +define void @test(i64 %arg, i32 %arg1, i8** %base) { +; CHECK: LAA: Adding RT check for range: +; CHECK-NEXT: Start: ((8 * (zext i32 (-1 + %arg1) to i64)) + (8 * (1 smin %arg)) + (-8 * %arg) + %base) +; CHECK-SAME: End: ((8 * (zext i32 (-1 + %arg1) to i64)) + %base) +; CHECK-NEXT: LAA: Adding RT check for range: +; CHECK-NEXT: Start: ((8 * (1 smin %arg)) + %base) +; CHECK-SAME: End: ((8 * %arg) + %base) + +; CHECK: vector.body + +entry: + br label %loop + +loop: + %iv.1 = phi i64 [ %arg, %entry ], [ %iv.1.next, %loop ] + %iv.2 = phi i32 [ %arg1, %entry ], [ %iv.2.next, %loop ] + %iv.2.next = add nsw i32 %iv.2, -1 + %iv.2.ext = zext i32 %iv.2.next to i64 + %idx.1 = getelementptr inbounds i8*, i8** %base, i64 %iv.2.ext + %v.1 = load i8*, i8** %idx.1, align 8 + %idx.2 = getelementptr inbounds i8*, i8** %base, i64 %iv.1 + %v.2 = load i8*, i8** %idx.2, align 8 + store i8* %v.2, i8** %idx.1, align 8 + store i8* %v.1, i8** %idx.2, align 8 + %tmp11 = icmp sgt i64 %iv.1, 1 + %iv.1.next = add nsw i64 %iv.1, -1 + br i1 %tmp11, label %loop, label %exit + +exit: ; preds = %bb3 + ret void +} From 6625892d7c5db5bacbd6f89a7ea6acf7641f9705 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 7 Oct 2020 11:45:52 +0100 Subject: [PATCH 212/321] [ARM] Regenerate vldlane tests To help make the diffs in D88569 clearer --- llvm/test/CodeGen/ARM/vldlane.ll | 1153 +++++++++++++++++++++--------- 1 file changed, 796 insertions(+), 357 deletions(-) diff --git a/llvm/test/CodeGen/ARM/vldlane.ll b/llvm/test/CodeGen/ARM/vldlane.ll index f5c0f09ed4409..312337e246f54 100644 --- a/llvm/test/CodeGen/ARM/vldlane.ll +++ b/llvm/test/CodeGen/ARM/vldlane.ll @@ -1,91 +1,130 @@ -; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft -mattr=+neon | FileCheck %s --check-prefixes=CHECK,DEFAULT +; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft -mattr=+neon -regalloc=basic | FileCheck %s --check-prefixes=CHECK,BASIC -; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon -regalloc=basic %s -o - \ -; RUN: | FileCheck %s - -define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: vld1lanei8: ;Check the (default) alignment value. -;CHECK: vld1.8 {d16[3]}, [r0] - %tmp1 = load <8 x i8>, <8 x i8>* %B - %tmp2 = load i8, i8* %A, align 8 - %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3 - ret <8 x i8> %tmp3 +define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind { +; CHECK-LABEL: vld1lanei8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vld1.8 {d16[3]}, [r0] +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <8 x i8>, <8 x i8>* %B + %tmp2 = load i8, i8* %A, align 8 + %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3 + ret <8 x i8> %tmp3 } -define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: vld1lanei16: ;Check the alignment value. Max for this instruction is 16 bits: -;CHECK: vld1.16 {d16[2]}, [r0:16] - %tmp1 = load <4 x i16>, <4 x i16>* %B - %tmp2 = load i16, i16* %A, align 8 - %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2 - ret <4 x i16> %tmp3 +define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind { +; CHECK-LABEL: vld1lanei16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vld1.16 {d16[2]}, [r0:16] +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <4 x i16>, <4 x i16>* %B + %tmp2 = load i16, i16* %A, align 8 + %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2 + ret <4 x i16> %tmp3 } -define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: vld1lanei32: ;Check the alignment value. Max for this instruction is 32 bits: -;CHECK: vld1.32 {d16[1]}, [r0:32] - %tmp1 = load <2 x i32>, <2 x i32>* %B - %tmp2 = load i32, i32* %A, align 8 - %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 - ret <2 x i32> %tmp3 +define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind { +; CHECK-LABEL: vld1lanei32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vld1.32 {d16[1]}, [r0:32] +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <2 x i32>, <2 x i32>* %B + %tmp2 = load i32, i32* %A, align 8 + %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 + ret <2 x i32> %tmp3 } -define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: vld1lanei32a32: ;Check the alignment value. Legal values are none or :32. -;CHECK: vld1.32 {d16[1]}, [r0:32] - %tmp1 = load <2 x i32>, <2 x i32>* %B - %tmp2 = load i32, i32* %A, align 4 - %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 - ret <2 x i32> %tmp3 +define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind { +; CHECK-LABEL: vld1lanei32a32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vld1.32 {d16[1]}, [r0:32] +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <2 x i32>, <2 x i32>* %B + %tmp2 = load i32, i32* %A, align 4 + %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 + ret <2 x i32> %tmp3 } define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind { -;CHECK-LABEL: vld1lanef: -;CHECK: vld1.32 {d16[1]}, [r0:32] - %tmp1 = load <2 x float>, <2 x float>* %B - %tmp2 = load float, float* %A, align 4 - %tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1 - ret <2 x float> %tmp3 +; CHECK-LABEL: vld1lanef: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vld1.32 {d16[1]}, [r0:32] +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <2 x float>, <2 x float>* %B + %tmp2 = load float, float* %A, align 4 + %tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1 + ret <2 x float> %tmp3 } define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: vld1laneQi8: -;CHECK: vld1.8 {d17[1]}, [r0] - %tmp1 = load <16 x i8>, <16 x i8>* %B - %tmp2 = load i8, i8* %A, align 8 - %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9 - ret <16 x i8> %tmp3 +; CHECK-LABEL: vld1laneQi8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.8 {d17[1]}, [r0] +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <16 x i8>, <16 x i8>* %B + %tmp2 = load i8, i8* %A, align 8 + %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9 + ret <16 x i8> %tmp3 } define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: vld1laneQi16: -;CHECK: vld1.16 {d17[1]}, [r0:16] - %tmp1 = load <8 x i16>, <8 x i16>* %B - %tmp2 = load i16, i16* %A, align 8 - %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5 - ret <8 x i16> %tmp3 +; CHECK-LABEL: vld1laneQi16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.16 {d17[1]}, [r0:16] +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <8 x i16>, <8 x i16>* %B + %tmp2 = load i16, i16* %A, align 8 + %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5 + ret <8 x i16> %tmp3 } define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: vld1laneQi32: -;CHECK: vld1.32 {d17[1]}, [r0:32] - %tmp1 = load <4 x i32>, <4 x i32>* %B - %tmp2 = load i32, i32* %A, align 8 - %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3 - ret <4 x i32> %tmp3 +; CHECK-LABEL: vld1laneQi32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.32 {d17[1]}, [r0:32] +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <4 x i32>, <4 x i32>* %B + %tmp2 = load i32, i32* %A, align 8 + %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3 + ret <4 x i32> %tmp3 } define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind { -;CHECK-LABEL: vld1laneQf: -;CHECK: vld1.32 {d16[0]}, [r0:32] - %tmp1 = load <4 x float>, <4 x float>* %B - %tmp2 = load float, float* %A - %tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0 - ret <4 x float> %tmp3 +; CHECK-LABEL: vld1laneQf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <4 x float>, <4 x float>* %B + %tmp2 = load float, float* %A + %tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0 + ret <4 x float> %tmp3 } %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } @@ -97,123 +136,217 @@ define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind { %struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> } %struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> } -define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: vld2lanei8: ;Check the alignment value. Max for this instruction is 16 bits: -;CHECK: vld2.8 {d16[1], d17[1]}, [r0:16] - %tmp1 = load <8 x i8>, <8 x i8>* %B - %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4) - %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 - %tmp5 = add <8 x i8> %tmp3, %tmp4 - ret <8 x i8> %tmp5 +define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind { +; CHECK-LABEL: vld2lanei8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vorr d17, d16, d16 +; CHECK-NEXT: vld2.8 {d16[1], d17[1]}, [r0:16] +; CHECK-NEXT: vadd.i8 d16, d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <8 x i8>, <8 x i8>* %B + %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4) + %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 } -define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: vld2lanei16: ;Check the alignment value. Max for this instruction is 32 bits: -;CHECK: vld2.16 {d16[1], d17[1]}, [r0:32] - %tmp0 = bitcast i16* %A to i8* - %tmp1 = load <4 x i16>, <4 x i16>* %B - %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8) - %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1 - %tmp5 = add <4 x i16> %tmp3, %tmp4 - ret <4 x i16> %tmp5 +define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind { +; CHECK-LABEL: vld2lanei16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vorr d17, d16, d16 +; CHECK-NEXT: vld2.16 {d16[1], d17[1]}, [r0:32] +; CHECK-NEXT: vadd.i16 d16, d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>, <4 x i16>* %B + %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1 + %tmp5 = add <4 x i16> %tmp3, %tmp4 + ret <4 x i16> %tmp5 } define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: vld2lanei32: -;CHECK: vld2.32 - %tmp0 = bitcast i32* %A to i8* - %tmp1 = load <2 x i32>, <2 x i32>* %B - %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) - %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1 - %tmp5 = add <2 x i32> %tmp3, %tmp4 - ret <2 x i32> %tmp5 +; CHECK-LABEL: vld2lanei32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vorr d17, d16, d16 +; CHECK-NEXT: vld2.32 {d16[1], d17[1]}, [r0] +; CHECK-NEXT: vadd.i32 d16, d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>, <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1 + %tmp5 = add <2 x i32> %tmp3, %tmp4 + ret <2 x i32> %tmp5 } ;Check for a post-increment updating load. define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind { -;CHECK-LABEL: vld2lanei32_update: -;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}]! - %A = load i32*, i32** %ptr - %tmp0 = bitcast i32* %A to i8* - %tmp1 = load <2 x i32>, <2 x i32>* %B - %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) - %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1 - %tmp5 = add <2 x i32> %tmp3, %tmp4 - %tmp6 = getelementptr i32, i32* %A, i32 2 - store i32* %tmp6, i32** %ptr - ret <2 x i32> %tmp5 +; DEFAULT-LABEL: vld2lanei32_update: +; DEFAULT: @ %bb.0: +; DEFAULT-NEXT: vldr d16, [r1] +; DEFAULT-NEXT: ldr r3, [r0] +; DEFAULT-NEXT: vorr d17, d16, d16 +; DEFAULT-NEXT: vld2.32 {d16[1], d17[1]}, [r3]! +; DEFAULT-NEXT: vadd.i32 d16, d16, d17 +; DEFAULT-NEXT: str r3, [r0] +; DEFAULT-NEXT: vmov r2, r1, d16 +; DEFAULT-NEXT: mov r0, r2 +; DEFAULT-NEXT: mov pc, lr +; +; BASIC-LABEL: vld2lanei32_update: +; BASIC: @ %bb.0: +; BASIC-NEXT: mov r2, r1 +; BASIC-NEXT: mov r1, r0 +; BASIC-NEXT: vldr d16, [r2] +; BASIC-NEXT: ldr r0, [r0] +; BASIC-NEXT: vorr d17, d16, d16 +; BASIC-NEXT: vld2.32 {d16[1], d17[1]}, [r0]! +; BASIC-NEXT: vadd.i32 d16, d16, d17 +; BASIC-NEXT: str r0, [r1] +; BASIC-NEXT: vmov r2, r3, d16 +; BASIC-NEXT: mov r0, r2 +; BASIC-NEXT: mov r1, r3 +; BASIC-NEXT: mov pc, lr + %A = load i32*, i32** %ptr + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>, <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1 + %tmp5 = add <2 x i32> %tmp3, %tmp4 + %tmp6 = getelementptr i32, i32* %A, i32 2 + store i32* %tmp6, i32** %ptr + ret <2 x i32> %tmp5 } define <2 x i32> @vld2lanei32_odd_update(i32** %ptr, <2 x i32>* %B) nounwind { -;CHECK-LABEL: vld2lanei32_odd_update: -;CHECK: mov [[INC:r[0-9]+]], #12 -;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}], [[INC]] - %A = load i32*, i32** %ptr - %tmp0 = bitcast i32* %A to i8* - %tmp1 = load <2 x i32>, <2 x i32>* %B - %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) - %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1 - %tmp5 = add <2 x i32> %tmp3, %tmp4 - %tmp6 = getelementptr i32, i32* %A, i32 3 - store i32* %tmp6, i32** %ptr - ret <2 x i32> %tmp5 +; DEFAULT-LABEL: vld2lanei32_odd_update: +; DEFAULT: @ %bb.0: +; DEFAULT-NEXT: vldr d16, [r1] +; DEFAULT-NEXT: mov r1, #12 +; DEFAULT-NEXT: ldr r3, [r0] +; DEFAULT-NEXT: vorr d17, d16, d16 +; DEFAULT-NEXT: vld2.32 {d16[1], d17[1]}, [r3], r1 +; DEFAULT-NEXT: vadd.i32 d16, d16, d17 +; DEFAULT-NEXT: str r3, [r0] +; DEFAULT-NEXT: vmov r2, r1, d16 +; DEFAULT-NEXT: mov r0, r2 +; DEFAULT-NEXT: mov pc, lr +; +; BASIC-LABEL: vld2lanei32_odd_update: +; BASIC: @ %bb.0: +; BASIC-NEXT: mov r2, r1 +; BASIC-NEXT: mov r1, r0 +; BASIC-NEXT: vldr d16, [r2] +; BASIC-NEXT: mov r2, #12 +; BASIC-NEXT: ldr r0, [r0] +; BASIC-NEXT: vorr d17, d16, d16 +; BASIC-NEXT: vld2.32 {d16[1], d17[1]}, [r0], r2 +; BASIC-NEXT: vadd.i32 d16, d16, d17 +; BASIC-NEXT: str r0, [r1] +; BASIC-NEXT: vmov r2, r3, d16 +; BASIC-NEXT: mov r0, r2 +; BASIC-NEXT: mov r1, r3 +; BASIC-NEXT: mov pc, lr + %A = load i32*, i32** %ptr + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>, <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1 + %tmp5 = add <2 x i32> %tmp3, %tmp4 + %tmp6 = getelementptr i32, i32* %A, i32 3 + store i32* %tmp6, i32** %ptr + ret <2 x i32> %tmp5 } define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind { -;CHECK-LABEL: vld2lanef: -;CHECK: vld2.32 - %tmp0 = bitcast float* %A to i8* - %tmp1 = load <2 x float>, <2 x float>* %B - %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32.p0i8(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) - %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1 - %tmp5 = fadd <2 x float> %tmp3, %tmp4 - ret <2 x float> %tmp5 +; CHECK-LABEL: vld2lanef: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vorr d17, d16, d16 +; CHECK-NEXT: vld2.32 {d16[1], d17[1]}, [r0] +; CHECK-NEXT: vadd.f32 d16, d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr + %tmp0 = bitcast float* %A to i8* + %tmp1 = load <2 x float>, <2 x float>* %B + %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32.p0i8(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1 + %tmp5 = fadd <2 x float> %tmp3, %tmp4 + ret <2 x float> %tmp5 } -define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: vld2laneQi16: ;Check the (default) alignment. -;CHECK: vld2.16 {d17[1], d19[1]}, [{{r[0-9]+}}] - %tmp0 = bitcast i16* %A to i8* - %tmp1 = load <8 x i16>, <8 x i16>* %B - %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1) - %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 - %tmp5 = add <8 x i16> %tmp3, %tmp4 - ret <8 x i16> %tmp5 +define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind { +; CHECK-LABEL: vld2laneQi16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vorr q9, q8, q8 +; CHECK-NEXT: vld2.16 {d17[1], d19[1]}, [r0] +; CHECK-NEXT: vadd.i16 q8, q8, q9 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <8 x i16>, <8 x i16>* %B + %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1) + %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } -define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: vld2laneQi32: ;Check the alignment value. Max for this instruction is 64 bits: -;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}:64] - %tmp0 = bitcast i32* %A to i8* - %tmp1 = load <4 x i32>, <4 x i32>* %B - %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32.p0i8(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16) - %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1 - %tmp5 = add <4 x i32> %tmp3, %tmp4 - ret <4 x i32> %tmp5 +define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind { +; CHECK-LABEL: vld2laneQi32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vorr q9, q8, q8 +; CHECK-NEXT: vld2.32 {d17[0], d19[0]}, [r0:64] +; CHECK-NEXT: vadd.i32 q8, q8, q9 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <4 x i32>, <4 x i32>* %B + %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32.p0i8(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16) + %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1 + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind { -;CHECK-LABEL: vld2laneQf: -;CHECK: vld2.32 - %tmp0 = bitcast float* %A to i8* - %tmp1 = load <4 x float>, <4 x float>* %B - %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32.p0i8(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) - %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1 - %tmp5 = fadd <4 x float> %tmp3, %tmp4 - ret <4 x float> %tmp5 +; CHECK-LABEL: vld2laneQf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vorr q9, q8, q8 +; CHECK-NEXT: vld2.32 {d16[1], d18[1]}, [r0] +; CHECK-NEXT: vadd.f32 q8, q8, q9 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp0 = bitcast float* %A to i8* + %tmp1 = load <4 x float>, <4 x float>* %B + %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32.p0i8(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1 + %tmp5 = fadd <4 x float> %tmp3, %tmp4 + ret <4 x float> %tmp5 } declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly @@ -235,120 +368,297 @@ declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32.p0i8(i8*, <4 %struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> } define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: vld3lanei8: -;CHECK: vld3.8 - %tmp1 = load <8 x i8>, <8 x i8>* %B - %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) - %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1 - %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2 - %tmp6 = add <8 x i8> %tmp3, %tmp4 - %tmp7 = add <8 x i8> %tmp5, %tmp6 - ret <8 x i8> %tmp7 +; DEFAULT-LABEL: vld3lanei8: +; DEFAULT: @ %bb.0: +; DEFAULT-NEXT: vldr d16, [r1] +; DEFAULT-NEXT: vorr d17, d16, d16 +; DEFAULT-NEXT: vorr d18, d16, d16 +; DEFAULT-NEXT: vld3.8 {d16[1], d17[1], d18[1]}, [r0] +; DEFAULT-NEXT: vadd.i8 d20, d16, d17 +; DEFAULT-NEXT: vadd.i8 d16, d18, d20 +; DEFAULT-NEXT: vmov r0, r1, d16 +; DEFAULT-NEXT: mov pc, lr +; +; BASIC-LABEL: vld3lanei8: +; BASIC: @ %bb.0: +; BASIC-NEXT: vldr d18, [r1] +; BASIC-NEXT: vorr d19, d18, d18 +; BASIC-NEXT: vorr d20, d18, d18 +; BASIC-NEXT: vld3.8 {d18[1], d19[1], d20[1]}, [r0] +; BASIC-NEXT: vadd.i8 d16, d18, d19 +; BASIC-NEXT: vadd.i8 d16, d20, d16 +; BASIC-NEXT: vmov r0, r1, d16 +; BASIC-NEXT: mov pc, lr + %tmp1 = load <8 x i8>, <8 x i8>* %B + %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2 + %tmp6 = add <8 x i8> %tmp3, %tmp4 + %tmp7 = add <8 x i8> %tmp5, %tmp6 + ret <8 x i8> %tmp7 } -define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: vld3lanei16: ;Check the (default) alignment value. VLD3 does not support alignment. -;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}] - %tmp0 = bitcast i16* %A to i8* - %tmp1 = load <4 x i16>, <4 x i16>* %B - %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8) - %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1 - %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2 - %tmp6 = add <4 x i16> %tmp3, %tmp4 - %tmp7 = add <4 x i16> %tmp5, %tmp6 - ret <4 x i16> %tmp7 +define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind { +; DEFAULT-LABEL: vld3lanei16: +; DEFAULT: @ %bb.0: +; DEFAULT-NEXT: vldr d16, [r1] +; DEFAULT-NEXT: vorr d17, d16, d16 +; DEFAULT-NEXT: vorr d18, d16, d16 +; DEFAULT-NEXT: vld3.16 {d16[1], d17[1], d18[1]}, [r0] +; DEFAULT-NEXT: vadd.i16 d20, d16, d17 +; DEFAULT-NEXT: vadd.i16 d16, d18, d20 +; DEFAULT-NEXT: vmov r0, r1, d16 +; DEFAULT-NEXT: mov pc, lr +; +; BASIC-LABEL: vld3lanei16: +; BASIC: @ %bb.0: +; BASIC-NEXT: vldr d18, [r1] +; BASIC-NEXT: vorr d19, d18, d18 +; BASIC-NEXT: vorr d20, d18, d18 +; BASIC-NEXT: vld3.16 {d18[1], d19[1], d20[1]}, [r0] +; BASIC-NEXT: vadd.i16 d16, d18, d19 +; BASIC-NEXT: vadd.i16 d16, d20, d16 +; BASIC-NEXT: vmov r0, r1, d16 +; BASIC-NEXT: mov pc, lr + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>, <4 x i16>* %B + %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2 + %tmp6 = add <4 x i16> %tmp3, %tmp4 + %tmp7 = add <4 x i16> %tmp5, %tmp6 + ret <4 x i16> %tmp7 } define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: vld3lanei32: -;CHECK: vld3.32 - %tmp0 = bitcast i32* %A to i8* - %tmp1 = load <2 x i32>, <2 x i32>* %B - %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) - %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1 - %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2 - %tmp6 = add <2 x i32> %tmp3, %tmp4 - %tmp7 = add <2 x i32> %tmp5, %tmp6 - ret <2 x i32> %tmp7 +; DEFAULT-LABEL: vld3lanei32: +; DEFAULT: @ %bb.0: +; DEFAULT-NEXT: vldr d16, [r1] +; DEFAULT-NEXT: vorr d17, d16, d16 +; DEFAULT-NEXT: vorr d18, d16, d16 +; DEFAULT-NEXT: vld3.32 {d16[1], d17[1], d18[1]}, [r0] +; DEFAULT-NEXT: vadd.i32 d20, d16, d17 +; DEFAULT-NEXT: vadd.i32 d16, d18, d20 +; DEFAULT-NEXT: vmov r0, r1, d16 +; DEFAULT-NEXT: mov pc, lr +; +; BASIC-LABEL: vld3lanei32: +; BASIC: @ %bb.0: +; BASIC-NEXT: vldr d18, [r1] +; BASIC-NEXT: vorr d19, d18, d18 +; BASIC-NEXT: vorr d20, d18, d18 +; BASIC-NEXT: vld3.32 {d18[1], d19[1], d20[1]}, [r0] +; BASIC-NEXT: vadd.i32 d16, d18, d19 +; BASIC-NEXT: vadd.i32 d16, d20, d16 +; BASIC-NEXT: vmov r0, r1, d16 +; BASIC-NEXT: mov pc, lr + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>, <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2 + %tmp6 = add <2 x i32> %tmp3, %tmp4 + %tmp7 = add <2 x i32> %tmp5, %tmp6 + ret <2 x i32> %tmp7 } define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind { -;CHECK-LABEL: vld3lanef: -;CHECK: vld3.32 - %tmp0 = bitcast float* %A to i8* - %tmp1 = load <2 x float>, <2 x float>* %B - %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32.p0i8(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) - %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1 - %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2 - %tmp6 = fadd <2 x float> %tmp3, %tmp4 - %tmp7 = fadd <2 x float> %tmp5, %tmp6 - ret <2 x float> %tmp7 +; DEFAULT-LABEL: vld3lanef: +; DEFAULT: @ %bb.0: +; DEFAULT-NEXT: vldr d16, [r1] +; DEFAULT-NEXT: vorr d17, d16, d16 +; DEFAULT-NEXT: vorr d18, d16, d16 +; DEFAULT-NEXT: vld3.32 {d16[1], d17[1], d18[1]}, [r0] +; DEFAULT-NEXT: vadd.f32 d20, d16, d17 +; DEFAULT-NEXT: vadd.f32 d16, d18, d20 +; DEFAULT-NEXT: vmov r0, r1, d16 +; DEFAULT-NEXT: mov pc, lr +; +; BASIC-LABEL: vld3lanef: +; BASIC: @ %bb.0: +; BASIC-NEXT: vldr d18, [r1] +; BASIC-NEXT: vorr d19, d18, d18 +; BASIC-NEXT: vorr d20, d18, d18 +; BASIC-NEXT: vld3.32 {d18[1], d19[1], d20[1]}, [r0] +; BASIC-NEXT: vadd.f32 d16, d18, d19 +; BASIC-NEXT: vadd.f32 d16, d20, d16 +; BASIC-NEXT: vmov r0, r1, d16 +; BASIC-NEXT: mov pc, lr + %tmp0 = bitcast float* %A to i8* + %tmp1 = load <2 x float>, <2 x float>* %B + %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32.p0i8(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2 + %tmp6 = fadd <2 x float> %tmp3, %tmp4 + %tmp7 = fadd <2 x float> %tmp5, %tmp6 + ret <2 x float> %tmp7 } -define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: vld3laneQi16: ;Check the (default) alignment value. VLD3 does not support alignment. -;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}] - %tmp0 = bitcast i16* %A to i8* - %tmp1 = load <8 x i16>, <8 x i16>* %B - %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8) - %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1 - %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2 - %tmp6 = add <8 x i16> %tmp3, %tmp4 - %tmp7 = add <8 x i16> %tmp5, %tmp6 - ret <8 x i16> %tmp7 +define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind { +; DEFAULT-LABEL: vld3laneQi16: +; DEFAULT: @ %bb.0: +; DEFAULT-NEXT: vld1.64 {d16, d17}, [r1] +; DEFAULT-NEXT: vorr q9, q8, q8 +; DEFAULT-NEXT: vorr q10, q8, q8 +; DEFAULT-NEXT: vld3.16 {d16[1], d18[1], d20[1]}, [r0] +; DEFAULT-NEXT: vadd.i16 q12, q8, q9 +; DEFAULT-NEXT: vadd.i16 q8, q10, q12 +; DEFAULT-NEXT: vmov r0, r1, d16 +; DEFAULT-NEXT: vmov r2, r3, d17 +; DEFAULT-NEXT: mov pc, lr +; +; BASIC-LABEL: vld3laneQi16: +; BASIC: @ %bb.0: +; BASIC-NEXT: vld1.64 {d18, d19}, [r1] +; BASIC-NEXT: vorr q10, q9, q9 +; BASIC-NEXT: vorr q11, q9, q9 +; BASIC-NEXT: vld3.16 {d18[1], d20[1], d22[1]}, [r0] +; BASIC-NEXT: vadd.i16 q8, q9, q10 +; BASIC-NEXT: vadd.i16 q8, q11, q8 +; BASIC-NEXT: vmov r0, r1, d16 +; BASIC-NEXT: vmov r2, r3, d17 +; BASIC-NEXT: mov pc, lr + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <8 x i16>, <8 x i16>* %B + %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2 + %tmp6 = add <8 x i16> %tmp3, %tmp4 + %tmp7 = add <8 x i16> %tmp5, %tmp6 + ret <8 x i16> %tmp7 } ;Check for a post-increment updating load with register increment. define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind { -;CHECK-LABEL: vld3laneQi16_update: -;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+|lr}}], {{r[0-9]+}} - %A = load i16*, i16** %ptr - %tmp0 = bitcast i16* %A to i8* - %tmp1 = load <8 x i16>, <8 x i16>* %B - %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8) - %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1 - %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2 - %tmp6 = add <8 x i16> %tmp3, %tmp4 - %tmp7 = add <8 x i16> %tmp5, %tmp6 - %tmp8 = getelementptr i16, i16* %A, i32 %inc - store i16* %tmp8, i16** %ptr - ret <8 x i16> %tmp7 +; DEFAULT-LABEL: vld3laneQi16_update: +; DEFAULT: @ %bb.0: +; DEFAULT-NEXT: .save {r11, lr} +; DEFAULT-NEXT: push {r11, lr} +; DEFAULT-NEXT: vld1.64 {d16, d17}, [r1] +; DEFAULT-NEXT: lsl r1, r2, #1 +; DEFAULT-NEXT: vorr q9, q8, q8 +; DEFAULT-NEXT: ldr lr, [r0] +; DEFAULT-NEXT: vorr q10, q8, q8 +; DEFAULT-NEXT: vld3.16 {d16[1], d18[1], d20[1]}, [lr], r1 +; DEFAULT-NEXT: vadd.i16 q12, q8, q9 +; DEFAULT-NEXT: vadd.i16 q8, q10, q12 +; DEFAULT-NEXT: str lr, [r0] +; DEFAULT-NEXT: vmov r12, r1, d16 +; DEFAULT-NEXT: vmov r2, r3, d17 +; DEFAULT-NEXT: mov r0, r12 +; DEFAULT-NEXT: pop {r11, lr} +; DEFAULT-NEXT: mov pc, lr +; +; BASIC-LABEL: vld3laneQi16_update: +; BASIC: @ %bb.0: +; BASIC-NEXT: .save {r11, lr} +; BASIC-NEXT: push {r11, lr} +; BASIC-NEXT: vld1.64 {d18, d19}, [r1] +; BASIC-NEXT: mov r3, r0 +; BASIC-NEXT: vorr q10, q9, q9 +; BASIC-NEXT: lsl r1, r2, #1 +; BASIC-NEXT: ldr r0, [r0] +; BASIC-NEXT: vorr q11, q9, q9 +; BASIC-NEXT: vld3.16 {d18[1], d20[1], d22[1]}, [r0], r1 +; BASIC-NEXT: vadd.i16 q8, q9, q10 +; BASIC-NEXT: vadd.i16 q8, q11, q8 +; BASIC-NEXT: str r0, [r3] +; BASIC-NEXT: vmov r1, lr, d16 +; BASIC-NEXT: vmov r2, r12, d17 +; BASIC-NEXT: mov r0, r1 +; BASIC-NEXT: mov r1, lr +; BASIC-NEXT: mov r3, r12 +; BASIC-NEXT: pop {r11, lr} +; BASIC-NEXT: mov pc, lr + %A = load i16*, i16** %ptr + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <8 x i16>, <8 x i16>* %B + %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2 + %tmp6 = add <8 x i16> %tmp3, %tmp4 + %tmp7 = add <8 x i16> %tmp5, %tmp6 + %tmp8 = getelementptr i16, i16* %A, i32 %inc + store i16* %tmp8, i16** %ptr + ret <8 x i16> %tmp7 } define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: vld3laneQi32: -;CHECK: vld3.32 - %tmp0 = bitcast i32* %A to i8* - %tmp1 = load <4 x i32>, <4 x i32>* %B - %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32.p0i8(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1) - %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1 - %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2 - %tmp6 = add <4 x i32> %tmp3, %tmp4 - %tmp7 = add <4 x i32> %tmp5, %tmp6 - ret <4 x i32> %tmp7 +; DEFAULT-LABEL: vld3laneQi32: +; DEFAULT: @ %bb.0: +; DEFAULT-NEXT: vld1.64 {d16, d17}, [r1] +; DEFAULT-NEXT: vorr q9, q8, q8 +; DEFAULT-NEXT: vorr q10, q8, q8 +; DEFAULT-NEXT: vld3.32 {d17[1], d19[1], d21[1]}, [r0] +; DEFAULT-NEXT: vadd.i32 q12, q8, q9 +; DEFAULT-NEXT: vadd.i32 q8, q10, q12 +; DEFAULT-NEXT: vmov r0, r1, d16 +; DEFAULT-NEXT: vmov r2, r3, d17 +; DEFAULT-NEXT: mov pc, lr +; +; BASIC-LABEL: vld3laneQi32: +; BASIC: @ %bb.0: +; BASIC-NEXT: vld1.64 {d18, d19}, [r1] +; BASIC-NEXT: vorr q10, q9, q9 +; BASIC-NEXT: vorr q11, q9, q9 +; BASIC-NEXT: vld3.32 {d19[1], d21[1], d23[1]}, [r0] +; BASIC-NEXT: vadd.i32 q8, q9, q10 +; BASIC-NEXT: vadd.i32 q8, q11, q8 +; BASIC-NEXT: vmov r0, r1, d16 +; BASIC-NEXT: vmov r2, r3, d17 +; BASIC-NEXT: mov pc, lr + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <4 x i32>, <4 x i32>* %B + %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32.p0i8(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2 + %tmp6 = add <4 x i32> %tmp3, %tmp4 + %tmp7 = add <4 x i32> %tmp5, %tmp6 + ret <4 x i32> %tmp7 } define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind { -;CHECK-LABEL: vld3laneQf: -;CHECK: vld3.32 - %tmp0 = bitcast float* %A to i8* - %tmp1 = load <4 x float>, <4 x float>* %B - %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32.p0i8(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) - %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1 - %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2 - %tmp6 = fadd <4 x float> %tmp3, %tmp4 - %tmp7 = fadd <4 x float> %tmp5, %tmp6 - ret <4 x float> %tmp7 +; DEFAULT-LABEL: vld3laneQf: +; DEFAULT: @ %bb.0: +; DEFAULT-NEXT: vld1.64 {d16, d17}, [r1] +; DEFAULT-NEXT: vorr q9, q8, q8 +; DEFAULT-NEXT: vorr q10, q8, q8 +; DEFAULT-NEXT: vld3.32 {d16[1], d18[1], d20[1]}, [r0] +; DEFAULT-NEXT: vadd.f32 q12, q8, q9 +; DEFAULT-NEXT: vadd.f32 q8, q10, q12 +; DEFAULT-NEXT: vmov r0, r1, d16 +; DEFAULT-NEXT: vmov r2, r3, d17 +; DEFAULT-NEXT: mov pc, lr +; +; BASIC-LABEL: vld3laneQf: +; BASIC: @ %bb.0: +; BASIC-NEXT: vld1.64 {d18, d19}, [r1] +; BASIC-NEXT: vorr q10, q9, q9 +; BASIC-NEXT: vorr q11, q9, q9 +; BASIC-NEXT: vld3.32 {d18[1], d20[1], d22[1]}, [r0] +; BASIC-NEXT: vadd.f32 q8, q9, q10 +; BASIC-NEXT: vadd.f32 q8, q11, q8 +; BASIC-NEXT: vmov r0, r1, d16 +; BASIC-NEXT: vmov r2, r3, d17 +; BASIC-NEXT: mov pc, lr + %tmp0 = bitcast float* %A to i8* + %tmp1 = load <4 x float>, <4 x float>* %B + %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32.p0i8(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2 + %tmp6 = fadd <4 x float> %tmp3, %tmp4 + %tmp7 = fadd <4 x float> %tmp5, %tmp6 + ret <4 x float> %tmp7 } declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly @@ -369,141 +679,245 @@ declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32.p0i8(i8*, <4 %struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> } -define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: vld4lanei8: ;Check the alignment value. Max for this instruction is 32 bits: -;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}:32] - %tmp1 = load <8 x i8>, <8 x i8>* %B - %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8) - %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1 - %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2 - %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3 - %tmp7 = add <8 x i8> %tmp3, %tmp4 - %tmp8 = add <8 x i8> %tmp5, %tmp6 - %tmp9 = add <8 x i8> %tmp7, %tmp8 - ret <8 x i8> %tmp9 +define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind { +; CHECK-LABEL: vld4lanei8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vorr d17, d16, d16 +; CHECK-NEXT: vorr d18, d16, d16 +; CHECK-NEXT: vorr d19, d16, d16 +; CHECK-NEXT: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0:32] +; CHECK-NEXT: vadd.i8 d16, d16, d17 +; CHECK-NEXT: vadd.i8 d20, d18, d19 +; CHECK-NEXT: vadd.i8 d16, d16, d20 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <8 x i8>, <8 x i8>* %B + %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3 + %tmp7 = add <8 x i8> %tmp3, %tmp4 + %tmp8 = add <8 x i8> %tmp5, %tmp6 + %tmp9 = add <8 x i8> %tmp7, %tmp8 + ret <8 x i8> %tmp9 } ;Check for a post-increment updating load. define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind { -;CHECK-LABEL: vld4lanei8_update: -;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:32]! - %A = load i8*, i8** %ptr - %tmp1 = load <8 x i8>, <8 x i8>* %B - %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8) - %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1 - %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2 - %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3 - %tmp7 = add <8 x i8> %tmp3, %tmp4 - %tmp8 = add <8 x i8> %tmp5, %tmp6 - %tmp9 = add <8 x i8> %tmp7, %tmp8 - %tmp10 = getelementptr i8, i8* %A, i32 4 - store i8* %tmp10, i8** %ptr - ret <8 x i8> %tmp9 +; DEFAULT-LABEL: vld4lanei8_update: +; DEFAULT: @ %bb.0: +; DEFAULT-NEXT: vldr d16, [r1] +; DEFAULT-NEXT: vorr d17, d16, d16 +; DEFAULT-NEXT: ldr r3, [r0] +; DEFAULT-NEXT: vorr d18, d16, d16 +; DEFAULT-NEXT: vorr d19, d16, d16 +; DEFAULT-NEXT: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r3:32]! +; DEFAULT-NEXT: vadd.i8 d16, d16, d17 +; DEFAULT-NEXT: vadd.i8 d20, d18, d19 +; DEFAULT-NEXT: str r3, [r0] +; DEFAULT-NEXT: vadd.i8 d16, d16, d20 +; DEFAULT-NEXT: vmov r2, r1, d16 +; DEFAULT-NEXT: mov r0, r2 +; DEFAULT-NEXT: mov pc, lr +; +; BASIC-LABEL: vld4lanei8_update: +; BASIC: @ %bb.0: +; BASIC-NEXT: vldr d16, [r1] +; BASIC-NEXT: mov r3, r0 +; BASIC-NEXT: vorr d17, d16, d16 +; BASIC-NEXT: ldr r0, [r0] +; BASIC-NEXT: vorr d18, d16, d16 +; BASIC-NEXT: vorr d19, d16, d16 +; BASIC-NEXT: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0:32]! +; BASIC-NEXT: vadd.i8 d16, d16, d17 +; BASIC-NEXT: vadd.i8 d20, d18, d19 +; BASIC-NEXT: str r0, [r3] +; BASIC-NEXT: vadd.i8 d16, d16, d20 +; BASIC-NEXT: vmov r1, r2, d16 +; BASIC-NEXT: mov r0, r1 +; BASIC-NEXT: mov r1, r2 +; BASIC-NEXT: mov pc, lr + %A = load i8*, i8** %ptr + %tmp1 = load <8 x i8>, <8 x i8>* %B + %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3 + %tmp7 = add <8 x i8> %tmp3, %tmp4 + %tmp8 = add <8 x i8> %tmp5, %tmp6 + %tmp9 = add <8 x i8> %tmp7, %tmp8 + %tmp10 = getelementptr i8, i8* %A, i32 4 + store i8* %tmp10, i8** %ptr + ret <8 x i8> %tmp9 } -define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: vld4lanei16: ;Check that a power-of-two alignment smaller than the total size of the memory ;being loaded is ignored. -;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}] - %tmp0 = bitcast i16* %A to i8* - %tmp1 = load <4 x i16>, <4 x i16>* %B - %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4) - %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1 - %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2 - %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3 - %tmp7 = add <4 x i16> %tmp3, %tmp4 - %tmp8 = add <4 x i16> %tmp5, %tmp6 - %tmp9 = add <4 x i16> %tmp7, %tmp8 - ret <4 x i16> %tmp9 +define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind { +; CHECK-LABEL: vld4lanei16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vorr d17, d16, d16 +; CHECK-NEXT: vorr d18, d16, d16 +; CHECK-NEXT: vorr d19, d16, d16 +; CHECK-NEXT: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] +; CHECK-NEXT: vadd.i16 d16, d16, d17 +; CHECK-NEXT: vadd.i16 d20, d18, d19 +; CHECK-NEXT: vadd.i16 d16, d16, d20 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>, <4 x i16>* %B + %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4) + %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3 + %tmp7 = add <4 x i16> %tmp3, %tmp4 + %tmp8 = add <4 x i16> %tmp5, %tmp6 + %tmp9 = add <4 x i16> %tmp7, %tmp8 + ret <4 x i16> %tmp9 } -define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: vld4lanei32: ;Check the alignment value. An 8-byte alignment is allowed here even though ;it is smaller than the total size of the memory being loaded. -;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:64] - %tmp0 = bitcast i32* %A to i8* - %tmp1 = load <2 x i32>, <2 x i32>* %B - %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8) - %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1 - %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2 - %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3 - %tmp7 = add <2 x i32> %tmp3, %tmp4 - %tmp8 = add <2 x i32> %tmp5, %tmp6 - %tmp9 = add <2 x i32> %tmp7, %tmp8 - ret <2 x i32> %tmp9 +define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind { +; CHECK-LABEL: vld4lanei32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vorr d17, d16, d16 +; CHECK-NEXT: vorr d18, d16, d16 +; CHECK-NEXT: vorr d19, d16, d16 +; CHECK-NEXT: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0:64] +; CHECK-NEXT: vadd.i32 d16, d16, d17 +; CHECK-NEXT: vadd.i32 d20, d18, d19 +; CHECK-NEXT: vadd.i32 d16, d16, d20 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>, <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3 + %tmp7 = add <2 x i32> %tmp3, %tmp4 + %tmp8 = add <2 x i32> %tmp5, %tmp6 + %tmp9 = add <2 x i32> %tmp7, %tmp8 + ret <2 x i32> %tmp9 } define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind { -;CHECK-LABEL: vld4lanef: -;CHECK: vld4.32 - %tmp0 = bitcast float* %A to i8* - %tmp1 = load <2 x float>, <2 x float>* %B - %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32.p0i8(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) - %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1 - %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2 - %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3 - %tmp7 = fadd <2 x float> %tmp3, %tmp4 - %tmp8 = fadd <2 x float> %tmp5, %tmp6 - %tmp9 = fadd <2 x float> %tmp7, %tmp8 - ret <2 x float> %tmp9 +; CHECK-LABEL: vld4lanef: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vorr d17, d16, d16 +; CHECK-NEXT: vorr d18, d16, d16 +; CHECK-NEXT: vorr d19, d16, d16 +; CHECK-NEXT: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0] +; CHECK-NEXT: vadd.f32 d16, d16, d17 +; CHECK-NEXT: vadd.f32 d20, d18, d19 +; CHECK-NEXT: vadd.f32 d16, d16, d20 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr + %tmp0 = bitcast float* %A to i8* + %tmp1 = load <2 x float>, <2 x float>* %B + %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32.p0i8(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3 + %tmp7 = fadd <2 x float> %tmp3, %tmp4 + %tmp8 = fadd <2 x float> %tmp5, %tmp6 + %tmp9 = fadd <2 x float> %tmp7, %tmp8 + ret <2 x float> %tmp9 } -define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: vld4laneQi16: ;Check the alignment value. Max for this instruction is 64 bits: -;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}:64] - %tmp0 = bitcast i16* %A to i8* - %tmp1 = load <8 x i16>, <8 x i16>* %B - %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16) - %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1 - %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2 - %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3 - %tmp7 = add <8 x i16> %tmp3, %tmp4 - %tmp8 = add <8 x i16> %tmp5, %tmp6 - %tmp9 = add <8 x i16> %tmp7, %tmp8 - ret <8 x i16> %tmp9 +define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind { +; CHECK-LABEL: vld4laneQi16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vorr q9, q8, q8 +; CHECK-NEXT: vorr q10, q8, q8 +; CHECK-NEXT: vorr q11, q8, q8 +; CHECK-NEXT: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0:64] +; CHECK-NEXT: vadd.i16 q8, q8, q9 +; CHECK-NEXT: vadd.i16 q12, q10, q11 +; CHECK-NEXT: vadd.i16 q8, q8, q12 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <8 x i16>, <8 x i16>* %B + %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16) + %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3 + %tmp7 = add <8 x i16> %tmp3, %tmp4 + %tmp8 = add <8 x i16> %tmp5, %tmp6 + %tmp9 = add <8 x i16> %tmp7, %tmp8 + ret <8 x i16> %tmp9 } -define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: vld4laneQi32: ;Check the (default) alignment. -;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [{{r[0-9]+}}] - %tmp0 = bitcast i32* %A to i8* - %tmp1 = load <4 x i32>, <4 x i32>* %B - %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32.p0i8(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) - %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1 - %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2 - %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3 - %tmp7 = add <4 x i32> %tmp3, %tmp4 - %tmp8 = add <4 x i32> %tmp5, %tmp6 - %tmp9 = add <4 x i32> %tmp7, %tmp8 - ret <4 x i32> %tmp9 +define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind { +; CHECK-LABEL: vld4laneQi32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vorr q9, q8, q8 +; CHECK-NEXT: vorr q10, q8, q8 +; CHECK-NEXT: vorr q11, q8, q8 +; CHECK-NEXT: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0] +; CHECK-NEXT: vadd.i32 q8, q8, q9 +; CHECK-NEXT: vadd.i32 q12, q10, q11 +; CHECK-NEXT: vadd.i32 q8, q8, q12 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <4 x i32>, <4 x i32>* %B + %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32.p0i8(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3 + %tmp7 = add <4 x i32> %tmp3, %tmp4 + %tmp8 = add <4 x i32> %tmp5, %tmp6 + %tmp9 = add <4 x i32> %tmp7, %tmp8 + ret <4 x i32> %tmp9 } define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind { -;CHECK-LABEL: vld4laneQf: -;CHECK: vld4.32 - %tmp0 = bitcast float* %A to i8* - %tmp1 = load <4 x float>, <4 x float>* %B - %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32.p0i8(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) - %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0 - %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1 - %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2 - %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3 - %tmp7 = fadd <4 x float> %tmp3, %tmp4 - %tmp8 = fadd <4 x float> %tmp5, %tmp6 - %tmp9 = fadd <4 x float> %tmp7, %tmp8 - ret <4 x float> %tmp9 +; CHECK-LABEL: vld4laneQf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vorr q9, q8, q8 +; CHECK-NEXT: vorr q10, q8, q8 +; CHECK-NEXT: vorr q11, q8, q8 +; CHECK-NEXT: vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r0] +; CHECK-NEXT: vadd.f32 q8, q8, q9 +; CHECK-NEXT: vadd.f32 q12, q10, q11 +; CHECK-NEXT: vadd.f32 q8, q8, q12 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp0 = bitcast float* %A to i8* + %tmp1 = load <4 x float>, <4 x float>* %B + %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32.p0i8(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3 + %tmp7 = fadd <4 x float> %tmp3, %tmp4 + %tmp8 = fadd <4 x float> %tmp5, %tmp6 + %tmp9 = fadd <4 x float> %tmp7, %tmp8 + ret <4 x float> %tmp9 } declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly @@ -520,8 +934,33 @@ declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32.p0i8(i8*, <4 ; we don't currently have a QQQQ_VFP2 super-regclass. (The "0" for the low ; part of %ins67 is supposed to be loaded by a VLDRS instruction in this test.) define <8 x i16> @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind { -;CHECK-LABEL: test_qqqq_regsequence_subreg: -;CHECK: vld3.16 +; DEFAULT-LABEL: test_qqqq_regsequence_subreg: +; DEFAULT: @ %bb.0: +; DEFAULT-NEXT: add r0, sp, #24 +; DEFAULT-NEXT: vld1.32 {d21[0]}, [r0:32] +; DEFAULT-NEXT: add r0, sp, #28 +; DEFAULT-NEXT: vmov.i32 d20, #0x0 +; DEFAULT-NEXT: vld1.32 {d21[1]}, [r0:32] +; DEFAULT-NEXT: vld3.16 {d16[1], d18[1], d20[1]}, [r0] +; DEFAULT-NEXT: vadd.i16 q12, q8, q9 +; DEFAULT-NEXT: vadd.i16 q8, q10, q12 +; DEFAULT-NEXT: vmov r0, r1, d16 +; DEFAULT-NEXT: vmov r2, r3, d17 +; DEFAULT-NEXT: mov pc, lr +; +; BASIC-LABEL: test_qqqq_regsequence_subreg: +; BASIC: @ %bb.0: +; BASIC-NEXT: add r0, sp, #24 +; BASIC-NEXT: vld1.32 {d23[0]}, [r0:32] +; BASIC-NEXT: add r0, sp, #28 +; BASIC-NEXT: vmov.i32 d22, #0x0 +; BASIC-NEXT: vld1.32 {d23[1]}, [r0:32] +; BASIC-NEXT: vld3.16 {d18[1], d20[1], d22[1]}, [r0] +; BASIC-NEXT: vadd.i16 q8, q9, q10 +; BASIC-NEXT: vadd.i16 q8, q11, q8 +; BASIC-NEXT: vmov r0, r1, d16 +; BASIC-NEXT: vmov r2, r3, d17 +; BASIC-NEXT: mov pc, lr %tmp63 = extractvalue [6 x i64] %b, 5 %tmp64 = zext i64 %tmp63 to i128 %tmp65 = shl i128 %tmp64, 64 From dce03e3059f06ca28d3c9f6fc83839933e16e0e2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 7 Oct 2020 11:46:46 +0100 Subject: [PATCH 213/321] [InstCombine] Tweak funnel by constant tests for better shl/lshr commutation coverage --- llvm/test/Transforms/InstCombine/funnel.ll | 56 +++++++++++----------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/funnel.ll b/llvm/test/Transforms/InstCombine/funnel.ll index f8844519ee749..fca73a4ffb884 100644 --- a/llvm/test/Transforms/InstCombine/funnel.ll +++ b/llvm/test/Transforms/InstCombine/funnel.ll @@ -23,13 +23,13 @@ define i32 @fshl_i32_constant(i32 %x, i32 %y) { define i42 @fshr_i42_constant(i42 %x, i42 %y) { ; CHECK-LABEL: @fshr_i42_constant( -; CHECK-NEXT: [[SHL:%.*]] = shl i42 [[X:%.*]], 31 -; CHECK-NEXT: [[SHR:%.*]] = lshr i42 [[Y:%.*]], 11 +; CHECK-NEXT: [[SHR:%.*]] = lshr i42 [[X:%.*]], 31 +; CHECK-NEXT: [[SHL:%.*]] = shl i42 [[Y:%.*]], 11 ; CHECK-NEXT: [[R:%.*]] = or i42 [[SHR]], [[SHL]] ; CHECK-NEXT: ret i42 [[R]] ; - %shl = shl i42 %x, 31 - %shr = lshr i42 %y, 11 + %shr = lshr i42 %x, 31 + %shl = shl i42 %y, 11 %r = or i42 %shr, %shl ret i42 %r } @@ -79,39 +79,39 @@ define <2 x i16> @fshl_v2i16_constant_splat_undef1(<2 x i16> %x, <2 x i16> %y) { define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) { ; CHECK-LABEL: @fshr_v2i17_constant_splat( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[X:%.*]], -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[Y:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], ; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]] ; CHECK-NEXT: ret <2 x i17> [[R]] ; - %shl = shl <2 x i17> %x, - %shr = lshr <2 x i17> %y, + %shr = lshr <2 x i17> %x, + %shl = shl <2 x i17> %y, %r = or <2 x i17> %shr, %shl ret <2 x i17> %r } define <2 x i17> @fshr_v2i17_constant_splat_undef0(<2 x i17> %x, <2 x i17> %y) { ; CHECK-LABEL: @fshr_v2i17_constant_splat_undef0( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[X:%.*]], -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[Y:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], ; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]] ; CHECK-NEXT: ret <2 x i17> [[R]] ; - %shl = shl <2 x i17> %x, - %shr = lshr <2 x i17> %y, + %shr = lshr <2 x i17> %x, + %shl = shl <2 x i17> %y, %r = or <2 x i17> %shr, %shl ret <2 x i17> %r } define <2 x i17> @fshr_v2i17_constant_splat_undef1(<2 x i17> %x, <2 x i17> %y) { ; CHECK-LABEL: @fshr_v2i17_constant_splat_undef1( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[X:%.*]], -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[Y:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], ; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]] ; CHECK-NEXT: ret <2 x i17> [[R]] ; - %shl = shl <2 x i17> %x, - %shr = lshr <2 x i17> %y, + %shr = lshr <2 x i17> %x, + %shl = shl <2 x i17> %y, %r = or <2 x i17> %shr, %shl ret <2 x i17> %r } @@ -120,39 +120,39 @@ define <2 x i17> @fshr_v2i17_constant_splat_undef1(<2 x i17> %x, <2 x i17> %y) { define <2 x i32> @fshr_v2i32_constant_nonsplat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @fshr_v2i32_constant_nonsplat( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[Y:%.*]], ; CHECK-NEXT: [[R:%.*]] = or <2 x i32> [[SHL]], [[SHR]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; - %shl = shl <2 x i32> %x, - %shr = lshr <2 x i32> %y, + %shr = lshr <2 x i32> %x, + %shl = shl <2 x i32> %y, %r = or <2 x i32> %shl, %shr ret <2 x i32> %r } define <2 x i32> @fshr_v2i32_constant_nonsplat_undef0(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @fshr_v2i32_constant_nonsplat_undef0( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[Y:%.*]], ; CHECK-NEXT: [[R:%.*]] = or <2 x i32> [[SHL]], [[SHR]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; - %shl = shl <2 x i32> %x, - %shr = lshr <2 x i32> %y, + %shr = lshr <2 x i32> %x, + %shl = shl <2 x i32> %y, %r = or <2 x i32> %shl, %shr ret <2 x i32> %r } define <2 x i32> @fshr_v2i32_constant_nonsplat_undef1(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @fshr_v2i32_constant_nonsplat_undef1( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[Y:%.*]], ; CHECK-NEXT: [[R:%.*]] = or <2 x i32> [[SHL]], [[SHR]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; - %shl = shl <2 x i32> %x, - %shr = lshr <2 x i32> %y, + %shr = lshr <2 x i32> %x, + %shl = shl <2 x i32> %y, %r = or <2 x i32> %shl, %shr ret <2 x i32> %r } From f71f5f39f670075f7b653b1e1a6bd24e5c8f099e Mon Sep 17 00:00:00 2001 From: Rodrigo Dominguez Date: Fri, 3 Apr 2020 17:37:51 -0400 Subject: [PATCH 214/321] [AMDGPU] Implement hardware bug workaround for image instructions Summary: This implements a workaround for a hardware bug in gfx8 and gfx9, where register usage is not estimated correctly for image_store and image_gather4 instructions when D16 is used. Change-Id: I4e30744da6796acac53a9b5ad37ac1c2035c8899 Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D81172 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 34 ++- .../AMDGPU/AMDGPUInstructionSelector.cpp | 24 +- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 56 +++- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 2 +- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 2 + llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 10 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 55 +++- llvm/lib/Target/AMDGPU/SIISelLowering.h | 3 +- ...legalize-llvm.amdgcn.image.store.2d.d16.ll | 246 +++++++++++++----- .../llvm.amdgcn.image.store.2d.d16.ll | 53 +++- .../AMDGPU/llvm.amdgcn.image.d16.dim.ll | 9 +- .../llvm.amdgcn.image.gather4.d16.dim.ll | 6 +- 12 files changed, 377 insertions(+), 123 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 6624ff00ecf64..1cd19a1d6484f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -234,6 +234,18 @@ def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug", "Branch offset of 3f hardware bug" >; +def FeatureImageStoreD16Bug : SubtargetFeature<"image-store-d16-bug", + "HasImageStoreD16Bug", + "true", + "Image Store D16 hardware bug" +>; + +def FeatureImageGather4D16Bug : SubtargetFeature<"image-gather4-d16-bug", + "HasImageGather4D16Bug", + "true", + "Image Gather4 D16 hardware bug" +>; + class SubtargetFeatureLDSBankCount : SubtargetFeature < "ldsbankcount"#Value, "LDSBankCount", @@ -810,7 +822,9 @@ def FeatureISAVersion8_1_0 : FeatureSet< [FeatureVolcanicIslands, FeatureLDSBankCount16, FeatureXNACK, - FeatureCodeObjectV3]>; + FeatureCodeObjectV3, + FeatureImageStoreD16Bug, + FeatureImageGather4D16Bug]>; def FeatureISAVersion9_0_0 : FeatureSet< [FeatureGFX9, @@ -818,7 +832,8 @@ def FeatureISAVersion9_0_0 : FeatureSet< FeatureLDSBankCount32, FeatureCodeObjectV3, FeatureDoesNotSupportXNACK, - FeatureDoesNotSupportSRAMECC]>; + FeatureDoesNotSupportSRAMECC, + FeatureImageGather4D16Bug]>; def FeatureISAVersion9_0_2 : FeatureSet< [FeatureGFX9, @@ -826,7 +841,8 @@ def FeatureISAVersion9_0_2 : FeatureSet< FeatureLDSBankCount32, FeatureXNACK, FeatureDoesNotSupportSRAMECC, - FeatureCodeObjectV3]>; + FeatureCodeObjectV3, + FeatureImageGather4D16Bug]>; def FeatureISAVersion9_0_4 : FeatureSet< [FeatureGFX9, @@ -834,7 +850,8 @@ def FeatureISAVersion9_0_4 : FeatureSet< FeatureFmaMixInsts, FeatureDoesNotSupportXNACK, FeatureDoesNotSupportSRAMECC, - FeatureCodeObjectV3]>; + FeatureCodeObjectV3, + FeatureImageGather4D16Bug]>; def FeatureISAVersion9_0_6 : FeatureSet< [FeatureGFX9, @@ -845,7 +862,8 @@ def FeatureISAVersion9_0_6 : FeatureSet< FeatureDot1Insts, FeatureDot2Insts, FeatureDoesNotSupportXNACK, - FeatureCodeObjectV3]>; + FeatureCodeObjectV3, + FeatureImageGather4D16Bug]>; def FeatureISAVersion9_0_8 : FeatureSet< [FeatureGFX9, @@ -864,14 +882,16 @@ def FeatureISAVersion9_0_8 : FeatureSet< FeatureAtomicFaddInsts, FeatureSRAMECC, FeatureMFMAInlineLiteralBug, - FeatureCodeObjectV3]>; + FeatureCodeObjectV3, + FeatureImageGather4D16Bug]>; def FeatureISAVersion9_0_9 : FeatureSet< [FeatureGFX9, FeatureMadMixInsts, FeatureLDSBankCount32, FeatureXNACK, - FeatureCodeObjectV3]>; + FeatureCodeObjectV3, + FeatureImageGather4D16Bug]>; // TODO: Organize more features into groups. def FeatureGroup { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 341e28b760af4..7d7e7dc5c86e7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1539,6 +1539,16 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( DMask = MI.getOperand(ArgOffset + Intr->DMaskIndex).getImm(); DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask); + // One memoperand is mandatory, except for getresinfo. + // FIXME: Check this in verifier. + if (!MI.memoperands_empty()) { + const MachineMemOperand *MMO = *MI.memoperands_begin(); + + // Infer d16 from the memory size, as the register type will be mangled by + // unpacked subtargets, or by TFE. + IsD16 = ((8 * MMO->getSize()) / DMaskLanes) < 32; + } + if (BaseOpcode->Store) { VDataIn = MI.getOperand(1).getReg(); VDataTy = MRI->getType(VDataIn); @@ -1548,18 +1558,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( VDataTy = MRI->getType(VDataOut); NumVDataDwords = DMaskLanes; - // One memoperand is mandatory, except for getresinfo. - // FIXME: Check this in verifier. - if (!MI.memoperands_empty()) { - const MachineMemOperand *MMO = *MI.memoperands_begin(); - - // Infer d16 from the memory size, as the register type will be mangled by - // unpacked subtargets, or by TFE. - IsD16 = ((8 * MMO->getSize()) / DMaskLanes) < 32; - - if (IsD16 && !STI.hasUnpackedD16VMem()) - NumVDataDwords = (DMaskLanes + 1) / 2; - } + if (IsD16 && !STI.hasUnpackedD16VMem()) + NumVDataDwords = (DMaskLanes + 1) / 2; } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index f141ca196a9ad..9b6a7d096273a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3528,24 +3528,58 @@ AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B, /// Handle register layout difference for f16 images for some subtargets. Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, - Register Reg) const { - if (!ST.hasUnpackedD16VMem()) - return Reg; - + Register Reg, + bool ImageStore) const { const LLT S16 = LLT::scalar(16); const LLT S32 = LLT::scalar(32); LLT StoreVT = MRI.getType(Reg); assert(StoreVT.isVector() && StoreVT.getElementType() == S16); - auto Unmerge = B.buildUnmerge(S16, Reg); + if (ST.hasUnpackedD16VMem()) { + auto Unmerge = B.buildUnmerge(S16, Reg); + + SmallVector WideRegs; + for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) + WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0)); + + int NumElts = StoreVT.getNumElements(); + + return B.buildBuildVector(LLT::vector(NumElts, S32), WideRegs).getReg(0); + } + + if (ImageStore && ST.hasImageStoreD16Bug()) { + if (StoreVT.getNumElements() == 2) { + SmallVector PackedRegs; + Reg = B.buildBitcast(S32, Reg).getReg(0); + PackedRegs.push_back(Reg); + PackedRegs.resize(2, B.buildUndef(S32).getReg(0)); + return B.buildBuildVector(LLT::vector(2, S32), PackedRegs).getReg(0); + } + + if (StoreVT.getNumElements() == 3) { + SmallVector PackedRegs; + auto Unmerge = B.buildUnmerge(S16, Reg); + for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) + PackedRegs.push_back(Unmerge.getReg(I)); + PackedRegs.resize(8, B.buildUndef(S16).getReg(0)); + Reg = B.buildBuildVector(LLT::vector(8, S16), PackedRegs).getReg(0); + return B.buildBitcast(LLT::vector(4, S32), Reg).getReg(0); + } - SmallVector WideRegs; - for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) - WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0)); + if (StoreVT.getNumElements() == 4) { + SmallVector PackedRegs; + Reg = B.buildBitcast(LLT::vector(2, S32), Reg).getReg(0); + auto Unmerge = B.buildUnmerge(S32, Reg); + for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) + PackedRegs.push_back(Unmerge.getReg(I)); + PackedRegs.resize(4, B.buildUndef(S32).getReg(0)); + return B.buildBuildVector(LLT::vector(4, S32), PackedRegs).getReg(0); + } - int NumElts = StoreVT.getNumElements(); + llvm_unreachable("invalid data type"); + } - return B.buildBuildVector(LLT::vector(NumElts, S32), WideRegs).getReg(0); + return Reg; } Register AMDGPULegalizerInfo::fixStoreSourceType( @@ -4215,7 +4249,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( if (!Ty.isVector() || Ty.getElementType() != S16) return true; - Register RepackedReg = handleD16VData(B, *MRI, VData); + Register RepackedReg = handleD16VData(B, *MRI, VData, true); if (RepackedReg != VData) { MI.getOperand(1).setReg(RepackedReg); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index e5f86511cd7c8..800fead086d0e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -146,7 +146,7 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const; Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, - Register Reg) const; + Register Reg, bool ImageStore = false) const; bool legalizeRawBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, bool IsFormat) const; bool legalizeRawBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 071354673ba21..0cbe7dfd30edc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -271,6 +271,8 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasNSAtoVMEMBug(false), HasOffset3fBug(false), HasFlatSegmentOffsetBug(false), + HasImageStoreD16Bug(false), + HasImageGather4D16Bug(false), FeatureDisable(false), InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 52d1f18513e18..a043873fb968c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -411,6 +411,8 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo, bool HasNSAtoVMEMBug; bool HasOffset3fBug; bool HasFlatSegmentOffsetBug; + bool HasImageStoreD16Bug; + bool HasImageGather4D16Bug; // Dummy feature to use for assembler in tablegen. bool FeatureDisable; @@ -1025,9 +1027,11 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo, return HasOffset3fBug; } - bool hasNSAEncoding() const { - return HasNSAEncoding; - } + bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; } + + bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; } + + bool hasNSAEncoding() const { return HasNSAEncoding; } bool hasGFX10_BEncoding() const { return GFX10_BEncoding; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1725c56e0db32..f1a5d8d66c4bd 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5851,7 +5851,7 @@ static SDValue constructRetValue(SelectionDAG &DAG, SDValue Data(Result, 0); SDValue TexFail; - if (IsTexFail) { + if (DMaskPop > 0 && Data.getValueType() != MaskPopVT) { SDValue ZeroIdx = DAG.getConstant(0, DL, MVT::i32); if (MaskPopVT.isVector()) { Data = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskPopVT, @@ -5860,10 +5860,6 @@ static SDValue constructRetValue(SelectionDAG &DAG, Data = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskPopVT, SDValue(Result, 0), ZeroIdx); } - - TexFail = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, - SDValue(Result, 0), - DAG.getConstant(MaskPopDwords, DL, MVT::i32)); } if (DataDwordVT.isVector()) @@ -5887,8 +5883,13 @@ static SDValue constructRetValue(SelectionDAG &DAG, } Data = DAG.getNode(ISD::BITCAST, DL, LegalReqRetVT, Data); - if (TexFail) + if (IsTexFail) { + TexFail = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, SDValue(Result, 0), + DAG.getConstant(MaskPopDwords, DL, MVT::i32)); + return DAG.getMergeValues({Data, TexFail, SDValue(Result, 1)}, DL); + } if (Result->getNumValues() == 1) return Data; @@ -6007,7 +6008,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op, return Op; // D16 is unsupported for this instruction IsD16 = true; - VData = handleD16VData(VData, DAG); + VData = handleD16VData(VData, DAG, true); } NumVDataDwords = (VData.getValueType().getSizeInBits() + 31) / 32; @@ -6027,7 +6028,11 @@ SDValue SITargetLowering::lowerImage(SDValue Op, (!LoadVT.isVector() && DMaskLanes > 1)) return Op; - if (IsD16 && !Subtarget->hasUnpackedD16VMem()) + // The sq block of gfx8 and gfx9 do not estimate register use correctly + // for d16 image_gather4, image_gather4_l, and image_gather4_lz + // instructions. + if (IsD16 && !Subtarget->hasUnpackedD16VMem() && + !(BaseOpcode->Gather4 && Subtarget->hasImageGather4D16Bug())) NumVDataDwords = (DMaskLanes + 1) / 2; else NumVDataDwords = DMaskLanes; @@ -7401,8 +7406,8 @@ SDValue SITargetLowering::getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL, return NewOp; } -SDValue SITargetLowering::handleD16VData(SDValue VData, - SelectionDAG &DAG) const { +SDValue SITargetLowering::handleD16VData(SDValue VData, SelectionDAG &DAG, + bool ImageStore) const { EVT StoreVT = VData.getValueType(); // No change for f16 and legal vector D16 types. @@ -7434,6 +7439,36 @@ SDValue SITargetLowering::handleD16VData(SDValue VData, return DAG.getNode(ISD::BITCAST, DL, WidenedStoreVT, ZExt); } + // The sq block of gfx8.1 does not estimate register use correctly for d16 + // image store instructions. The data operand is computed as if it were not a + // d16 image instruction. + if (ImageStore && Subtarget->hasImageStoreD16Bug()) { + // Bitcast to i16 + EVT IntStoreVT = StoreVT.changeTypeToInteger(); + SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData); + + // Decompose into scalars + SmallVector Elts; + DAG.ExtractVectorElements(IntVData, Elts); + + // Group pairs of i16 into v2i16 and bitcast to i32 + SmallVector PackedElts; + for (unsigned I = 0; I < Elts.size() / 2; I += 1) { + SDValue Pair = + DAG.getBuildVector(MVT::v2i16, DL, {Elts[I * 2], Elts[I * 2 + 1]}); + SDValue IntPair = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Pair); + PackedElts.push_back(IntPair); + } + + // Pad using UNDEF + PackedElts.resize(PackedElts.size() * 2, DAG.getUNDEF(MVT::i32)); + + // Build final vector + EVT VecVT = + EVT::getVectorVT(*DAG.getContext(), MVT::i32, PackedElts.size()); + return DAG.getBuildVector(VecVT, DL, PackedElts); + } + assert(isTypeLegal(StoreVT)); return VData; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 9aa307f7bc594..a62f19c017470 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -108,7 +108,8 @@ class SITargetLowering final : public AMDGPUTargetLowering { ArrayRef Ops, EVT MemVT, MachineMemOperand *MMO, SelectionDAG &DAG) const; - SDValue handleD16VData(SDValue VData, SelectionDAG &DAG) const; + SDValue handleD16VData(SDValue VData, SelectionDAG &DAG, + bool ImageStore = false) const; /// Converts \p Op, which must be of floating point type, to the /// floating point type \p VT, by either extending or truncating it. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll index ec9766d2434fb..b418fb6a2d9f3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll @@ -1,8 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=PACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX81 %s define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) { + ; PACKED-LABEL: name: image_store_f16 + ; PACKED: bb.1 (%ir-block.0): + ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; PACKED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "TargetCustom8") + ; PACKED: S_ENDPGM 0 ; UNPACKED-LABEL: name: image_store_f16 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 @@ -22,7 +41,31 @@ define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ha ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "TargetCustom8") ; UNPACKED: S_ENDPGM 0 - ; PACKED-LABEL: name: image_store_f16 + ; GFX81-LABEL: name: image_store_f16 + ; GFX81: bb.1 (%ir-block.0): + ; GFX81: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; GFX81: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX81: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX81: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX81: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX81: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX81: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX81: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX81: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX81: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX81: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX81: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX81: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX81: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "TargetCustom8") + ; GFX81: S_ENDPGM 0 + call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x half> %in) { + ; PACKED-LABEL: name: image_store_v2f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 @@ -35,17 +78,11 @@ define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ha ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; PACKED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; PACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "TargetCustom8") + ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "TargetCustom8") ; PACKED: S_ENDPGM 0 - call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x half> %in) { ; UNPACKED-LABEL: name: image_store_v2f16 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 @@ -70,9 +107,35 @@ define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY11]](s32), [[COPY12]](s32) ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "TargetCustom8") ; UNPACKED: S_ENDPGM 0 - ; PACKED-LABEL: name: image_store_v2f16 + ; GFX81-LABEL: name: image_store_v2f16 + ; GFX81: bb.1 (%ir-block.0): + ; GFX81: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; GFX81: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX81: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX81: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX81: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX81: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX81: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX81: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX81: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX81: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX81: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX81: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX81: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX81: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX81: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[DEF]](s32) + ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "TargetCustom8") + ; GFX81: S_ENDPGM 0 + call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <3 x half> %in) { + ; PACKED-LABEL: name: image_store_v3f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 @@ -84,15 +147,34 @@ define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; PACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) + ; PACKED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]] + ; PACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[DEF]](<2 x s16>) + ; PACKED: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "TargetCustom8") + ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8) ; PACKED: S_ENDPGM 0 - call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <3 x half> %in) { ; UNPACKED-LABEL: name: image_store_v3f16 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -123,7 +205,58 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32) ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8) ; UNPACKED: S_ENDPGM 0 - ; PACKED-LABEL: name: image_store_v3f16 + ; GFX81-LABEL: name: image_store_v3f16 + ; GFX81: bb.1 (%ir-block.0): + ; GFX81: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX81: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX81: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX81: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX81: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX81: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX81: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX81: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX81: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX81: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX81: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX81: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX81: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX81: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX81: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX81: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX81: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX81: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX81: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX81: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; GFX81: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX81: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; GFX81: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; GFX81: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX81: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; GFX81: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX81: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX81: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX81: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; GFX81: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]] + ; GFX81: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX81: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; GFX81: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX81: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX81: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; GFX81: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX81: [[COPY15:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; GFX81: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) + ; GFX81: [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; GFX81: [[BITCAST5:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS1]](<8 x s16>) + ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST5]](<4 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8) + ; GFX81: S_ENDPGM 0 + call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <4 x half> %in) { + ; PACKED-LABEL: name: image_store_v4f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 @@ -139,37 +272,10 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; PACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; PACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]] - ; PACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[DEF]](<2 x s16>) - ; PACKED: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8) + ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "TargetCustom8") ; PACKED: S_ENDPGM 0 - call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <4 x half> %in) { ; UNPACKED-LABEL: name: image_store_v4f16 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -199,26 +305,30 @@ define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "TargetCustom8") ; UNPACKED: S_ENDPGM 0 - ; PACKED-LABEL: name: image_store_v4f16 - ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; PACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "TargetCustom8") - ; PACKED: S_ENDPGM 0 + ; GFX81-LABEL: name: image_store_v4f16 + ; GFX81: bb.1 (%ir-block.0): + ; GFX81: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX81: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX81: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX81: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX81: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX81: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX81: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX81: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX81: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX81: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX81: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX81: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX81: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX81: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) + ; GFX81: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) + ; GFX81: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) + ; GFX81: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX81: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32) + ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "TargetCustom8") + ; GFX81: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll index d04469bf3e2e7..06006bd830f26 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -o - %s | FileCheck -check-prefix=UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -o - %s | FileCheck -check-prefix=PACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -o - %s | FileCheck -check-prefix=GFX81 %s define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) { ; UNPACKED-LABEL: image_store_f16: @@ -13,7 +13,7 @@ define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ha ; UNPACKED-NEXT: s_mov_b32 s5, s7 ; UNPACKED-NEXT: s_mov_b32 s6, s8 ; UNPACKED-NEXT: s_mov_b32 s7, s9 -; UNPACKED-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm +; UNPACKED-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16 ; UNPACKED-NEXT: s_endpgm ; ; PACKED-LABEL: image_store_f16: @@ -26,8 +26,21 @@ define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ha ; PACKED-NEXT: s_mov_b32 s5, s7 ; PACKED-NEXT: s_mov_b32 s6, s8 ; PACKED-NEXT: s_mov_b32 s7, s9 -; PACKED-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm +; PACKED-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16 ; PACKED-NEXT: s_endpgm +; +; GFX81-LABEL: image_store_f16: +; GFX81: ; %bb.0: +; GFX81-NEXT: s_mov_b32 s0, s2 +; GFX81-NEXT: s_mov_b32 s1, s3 +; GFX81-NEXT: s_mov_b32 s2, s4 +; GFX81-NEXT: s_mov_b32 s3, s5 +; GFX81-NEXT: s_mov_b32 s4, s6 +; GFX81-NEXT: s_mov_b32 s5, s7 +; GFX81-NEXT: s_mov_b32 s6, s8 +; GFX81-NEXT: s_mov_b32 s7, s9 +; GFX81-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16 +; GFX81-NEXT: s_endpgm call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } @@ -44,7 +57,7 @@ define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; UNPACKED-NEXT: s_mov_b32 s6, s8 ; UNPACKED-NEXT: s_mov_b32 s7, s9 ; UNPACKED-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; UNPACKED-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm +; UNPACKED-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16 ; UNPACKED-NEXT: s_endpgm ; ; PACKED-LABEL: image_store_v2f16: @@ -57,8 +70,21 @@ define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; PACKED-NEXT: s_mov_b32 s5, s7 ; PACKED-NEXT: s_mov_b32 s6, s8 ; PACKED-NEXT: s_mov_b32 s7, s9 -; PACKED-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x3 unorm +; PACKED-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x3 unorm d16 ; PACKED-NEXT: s_endpgm +; +; GFX81-LABEL: image_store_v2f16: +; GFX81: ; %bb.0: +; GFX81-NEXT: s_mov_b32 s0, s2 +; GFX81-NEXT: s_mov_b32 s1, s3 +; GFX81-NEXT: s_mov_b32 s2, s4 +; GFX81-NEXT: s_mov_b32 s3, s5 +; GFX81-NEXT: s_mov_b32 s4, s6 +; GFX81-NEXT: s_mov_b32 s5, s7 +; GFX81-NEXT: s_mov_b32 s6, s8 +; GFX81-NEXT: s_mov_b32 s7, s9 +; GFX81-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16 +; GFX81-NEXT: s_endpgm call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } @@ -85,7 +111,7 @@ define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; UNPACKED-NEXT: v_mov_b32_e32 v5, v0 ; UNPACKED-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; UNPACKED-NEXT: v_lshrrev_b32_e32 v4, 16, v3 -; UNPACKED-NEXT: image_store v[1:4], v[5:6], s[0:7] dmask:0xf unorm +; UNPACKED-NEXT: image_store v[1:4], v[5:6], s[0:7] dmask:0xf unorm d16 ; UNPACKED-NEXT: s_endpgm ; ; PACKED-LABEL: image_store_v4f16: @@ -98,8 +124,21 @@ define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; PACKED-NEXT: s_mov_b32 s5, s7 ; PACKED-NEXT: s_mov_b32 s6, s8 ; PACKED-NEXT: s_mov_b32 s7, s9 -; PACKED-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm +; PACKED-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16 ; PACKED-NEXT: s_endpgm +; +; GFX81-LABEL: image_store_v4f16: +; GFX81: ; %bb.0: +; GFX81-NEXT: s_mov_b32 s0, s2 +; GFX81-NEXT: s_mov_b32 s1, s3 +; GFX81-NEXT: s_mov_b32 s2, s4 +; GFX81-NEXT: s_mov_b32 s3, s5 +; GFX81-NEXT: s_mov_b32 s4, s6 +; GFX81-NEXT: s_mov_b32 s5, s7 +; GFX81-NEXT: s_mov_b32 s6, s8 +; GFX81-NEXT: s_mov_b32 s7, s9 +; GFX81-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0xf unorm d16 +; GFX81-NEXT: s_endpgm call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll index da1174d7eb860..06607e2a518e3 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED,GFX89 %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX81,GFX89 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX81,GFX89 %s ; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX9,GFX89 %s ; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s @@ -15,6 +15,7 @@ main_body: ; GCN-LABEL: {{^}}image_load_v2f16: ; UNPACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}} ; PACKED: image_load v0, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}} +; GFX81: image_load v0, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}} ; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm d16{{$}} define amdgpu_ps float @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { main_body: @@ -38,6 +39,7 @@ main_body: ; GCN-LABEL: {{^}}image_load_v4f16: ; UNPACKED: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} ; PACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} +; GFX81: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} ; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}} define amdgpu_ps <2 x float> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { main_body: @@ -49,6 +51,7 @@ main_body: ; GCN-LABEL: {{^}}image_load_mip_v4f16: ; UNPACKED: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm d16{{$}} ; PACKED: image_load_mip v[0:1], v[0:2], s[0:7] dmask:0xf unorm d16{{$}} +; GFX81: image_load_mip v[0:1], v[0:2], s[0:7] dmask:0xf unorm d16{{$}} ; GFX10: image_load_mip v[0:1], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}} define amdgpu_ps <2 x float> @image_load_mip_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) { main_body: @@ -60,6 +63,7 @@ main_body: ; GCN-LABEL: {{^}}image_load_3d_v2f16: ; UNPACKED: image_load v[0:1], v[0:2], s[0:7] dmask:0x3 unorm d16{{$}} ; PACKED: image_load v0, v[0:2], s[0:7] dmask:0x3 unorm d16{{$}} +; GFX81: image_load v0, v[0:2], s[0:7] dmask:0x3 unorm d16{{$}} ; GFX10: image_load v0, v[0:2], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm d16{{$}} define amdgpu_ps float @image_load_3d_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { main_body: @@ -90,6 +94,7 @@ main_body: ; UNPACKED: v_and_b32_e32 ; UNPACKED: image_store v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}} ; PACKED: image_store v2, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}} +; GFX81: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}} ; GFX10: image_store v2, v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm d16{{$}} define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, float %in) { main_body: @@ -113,6 +118,7 @@ main_body: ; UNPACKED: v_and_b32_e32 ; UNPACKED: image_store v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} ; PACKED: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} +; GFX81: image_store v[2:5], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} ; GFX10: image_store v[2:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}} define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) { main_body: @@ -128,6 +134,7 @@ main_body: ; UNPACKED: v_and_b32_e32 ; UNPACKED: image_store_mip v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} ; PACKED: image_store_mip v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} +; GFX81: image_store_mip v[2:5], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} ; GFX10: image_store_mip v[2:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm d16{{$}} define amdgpu_ps void @image_store_mip_1d_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %mip, <2 x float> %in) { main_body: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll index 2e4c38bae5335..e17b4c80bc800 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll @@ -1,11 +1,13 @@ ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED,GFX89 %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX81,GFX89 %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX9,GFX89 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX81,GFX89 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9,GFX89 %s ; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s ; GCN-LABEL: {{^}}image_gather4_b_2d_v4f16: ; UNPACKED: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x4 d16{{$}} ; PACKED: image_gather4_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0x4 d16{{$}} +; GFX810: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x4 d16{{$}} +; GFX9: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x4 d16{{$}} ; GFX10: image_gather4_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D d16{{$}} define amdgpu_ps <2 x float> @image_gather4_b_2d_v4f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { main_body: From fed0f890e5698a7a408acaf0aa23319e918f6a2a Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 7 Oct 2020 13:52:25 +0300 Subject: [PATCH 215/321] InstCombine: Negator: don't rely on complexity sorting already being performed (PR47752) In some cases, we can negate instruction if only one of it's operands negates. Previously, we assumed that constants would have been canonicalized to RHS already, but that isn't guaranteed to happen, because of InstCombine worklist visitation order, as the added test (previously-hanging) shows. So if we only need to negate a single operand, we should ensure ourselves that we try constant operand first. Do that by re-doing the complexity sorting ourselves, when we actually care about it. Fixes https://bugs.llvm.org/show_bug.cgi?id=47752 --- .../InstCombine/InstCombineInternal.h | 2 + .../InstCombine/InstCombineNegator.cpp | 46 +++++++++++++------ .../InstCombine/sub-of-negatible.ll | 24 ++++++++++ 3 files changed, 59 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index eef56c8645f83..10b67695d1213 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -762,6 +762,8 @@ class Negator final { using Result = std::pair /*NewInstructions*/, Value * /*NegatedRoot*/>; + std::array getSortedOperandsOfBinOp(Instruction *I); + LLVM_NODISCARD Value *visitImpl(Value *V, unsigned Depth); LLVM_NODISCARD Value *negate(Value *V, unsigned Depth); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp index bfecfe98a590f..b321eab01d7af 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp @@ -115,6 +115,19 @@ Negator::~Negator() { } #endif +// Due to the InstCombine's worklist management, there are no guarantees that +// each instruction we'll encounter has been visited by InstCombine already. +// In particular, most importantly for us, that means we have to canonicalize +// constants to RHS ourselves, since that is helpful sometimes. +std::array Negator::getSortedOperandsOfBinOp(Instruction *I) { + assert(I->getNumOperands() == 2 && "Only for binops!"); + std::array Ops{I->getOperand(0), I->getOperand(1)}; + if (I->isCommutative() && InstCombiner::getComplexity(I->getOperand(0)) < + InstCombiner::getComplexity(I->getOperand(1))) + std::swap(Ops[0], Ops[1]); + return Ops; +} + // FIXME: can this be reworked into a worklist-based algorithm while preserving // the depth-first, early bailout traversal? LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) { @@ -159,11 +172,13 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) { // In some cases we can give the answer without further recursion. switch (I->getOpcode()) { - case Instruction::Add: + case Instruction::Add: { + std::array Ops = getSortedOperandsOfBinOp(I); // `inc` is always negatible. - if (match(I->getOperand(1), m_One())) - return Builder.CreateNot(I->getOperand(0), I->getName() + ".neg"); + if (match(Ops[1], m_One())) + return Builder.CreateNot(Ops[0], I->getName() + ".neg"); break; + } case Instruction::Xor: // `not` is always negatible. if (match(I, m_Not(m_Value(X)))) @@ -344,16 +359,18 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) { ConstantExpr::getShl(Constant::getAllOnesValue(Op1C->getType()), Op1C), I->getName() + ".neg"); } - case Instruction::Or: + case Instruction::Or: { if (!haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1), DL, &AC, I, &DT)) return nullptr; // Don't know how to handle `or` in general. + std::array Ops = getSortedOperandsOfBinOp(I); // `or`/`add` are interchangeable when operands have no common bits set. // `inc` is always negatible. - if (match(I->getOperand(1), m_One())) - return Builder.CreateNot(I->getOperand(0), I->getName() + ".neg"); + if (match(Ops[1], m_One())) + return Builder.CreateNot(Ops[0], I->getName() + ".neg"); // Else, just defer to Instruction::Add handling. LLVM_FALLTHROUGH; + } case Instruction::Add: { // `add` is negatible if both of its operands are negatible. SmallVector NegatedOps, NonNegatedOps; @@ -383,26 +400,29 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) { return Builder.CreateSub(NegatedOps[0], NonNegatedOps[0], I->getName() + ".neg"); } - case Instruction::Xor: + case Instruction::Xor: { + std::array Ops = getSortedOperandsOfBinOp(I); // `xor` is negatible if one of its operands is invertible. // FIXME: InstCombineInverter? But how to connect Inverter and Negator? - if (auto *C = dyn_cast(I->getOperand(1))) { - Value *Xor = Builder.CreateXor(I->getOperand(0), ConstantExpr::getNot(C)); + if (auto *C = dyn_cast(Ops[1])) { + Value *Xor = Builder.CreateXor(Ops[0], ConstantExpr::getNot(C)); return Builder.CreateAdd(Xor, ConstantInt::get(Xor->getType(), 1), I->getName() + ".neg"); } return nullptr; + } case Instruction::Mul: { + std::array Ops = getSortedOperandsOfBinOp(I); // `mul` is negatible if one of its operands is negatible. Value *NegatedOp, *OtherOp; // First try the second operand, in case it's a constant it will be best to // just invert it instead of sinking the `neg` deeper. - if (Value *NegOp1 = negate(I->getOperand(1), Depth + 1)) { + if (Value *NegOp1 = negate(Ops[1], Depth + 1)) { NegatedOp = NegOp1; - OtherOp = I->getOperand(0); - } else if (Value *NegOp0 = negate(I->getOperand(0), Depth + 1)) { + OtherOp = Ops[0]; + } else if (Value *NegOp0 = negate(Ops[0], Depth + 1)) { NegatedOp = NegOp0; - OtherOp = I->getOperand(1); + OtherOp = Ops[1]; } else // Can't negate either of them. return nullptr; diff --git a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll index 92d1ac225a01b..3d988c9f3d4a8 100644 --- a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll +++ b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll @@ -1239,5 +1239,29 @@ define i4 @negate_freeze_extrause(i4 %x, i4 %y, i4 %z) { ret i4 %t2 } +; Due to the InstCombine's worklist management, there are no guarantees that +; each instruction we'll encounter has been visited by InstCombine already. +; In particular, most importantly for us, that means we have to canonicalize +; constants to RHS ourselves, since that is helpful sometimes. +; This used to cause an endless combine loop. +define void @noncanonical_mul_with_constant_as_first_operand() { +; CHECK-LABEL: @noncanonical_mul_with_constant_as_first_operand( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: br label [[IF_END]] +; +entry: + br label %if.end + +if.end: + %e.0 = phi i32 [ undef, %entry ], [ %div, %if.end ] + %conv = trunc i32 %e.0 to i16 + %mul.i = mul nsw i16 -1, %conv + %conv1 = sext i16 %mul.i to i32 + %div = sub nsw i32 0, %conv1 + br label %if.end +} + ; CHECK: !0 = !{!"branch_weights", i32 40, i32 1} !0 = !{!"branch_weights", i32 40, i32 1} From 6e6a5acf005681d6b6815c0618d0d263ef8397fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 6 Oct 2020 13:03:49 +0300 Subject: [PATCH 216/321] [LLD] [MinGW] Move an option definitions to alphabetical order, wrap a line. NFC. --- lld/MinGW/Options.td | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lld/MinGW/Options.td b/lld/MinGW/Options.td index d9f64c40ac53d..0604b458193cf 100644 --- a/lld/MinGW/Options.td +++ b/lld/MinGW/Options.td @@ -80,7 +80,10 @@ def strip_all: F<"strip-all">, HelpText<"Omit all symbol information from the output binary">; def strip_debug: F<"strip-debug">, HelpText<"Omit all debug information, but keep symbol information">; -defm reproduce: Eq<"reproduce", "Write a tar file containing input files and command line options to reproduce link">; +defm reproduce: Eq<"reproduce", + "Write a tar file containing input files and command line options to reproduce link">; +defm require_defined: Eq<"require-defined", + "Force symbol to be added to symbol table as an undefined one">; defm undefined: Eq<"undefined", "Include symbol in the link, if available">; defm whole_archive: B<"whole-archive", "Include all object files for following archives", @@ -88,8 +91,6 @@ defm whole_archive: B<"whole-archive", def v: Flag<["-"], "v">, HelpText<"Display the version number">; def verbose: F<"verbose">, HelpText<"Verbose mode">; def version: F<"version">, HelpText<"Display the version number and exit">; -defm require_defined: Eq<"require-defined", - "Force symbol to be added to symbol table as an undefined one">; // LLD specific options def _HASH_HASH_HASH : Flag<["-"], "###">, From 7b5dfb400a67f03122b43cd5d59b8b1ef6d00147 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Wed, 7 Oct 2020 14:38:10 +0200 Subject: [PATCH 217/321] [mlir] Add support for diagnostics in C API. Add basic support for registering diagnostic handlers with the context (actually, the diagnostic engine contained in the context) and processing diagnostic messages from the C API. Reviewed By: stellaraccident Differential Revision: https://reviews.llvm.org/D88736 --- mlir/include/mlir-c/Diagnostics.h | 87 ++++++++++++++++++++++++++++ mlir/include/mlir-c/Support.h | 36 ++++++++++++ mlir/include/mlir/CAPI/Diagnostics.h | 28 +++++++++ mlir/include/mlir/CAPI/Support.h | 11 ++++ mlir/lib/CAPI/IR/CMakeLists.txt | 1 + mlir/lib/CAPI/IR/Diagnostics.cpp | 75 ++++++++++++++++++++++++ mlir/test/CAPI/ir.c | 36 +++++++++++- 7 files changed, 273 insertions(+), 1 deletion(-) create mode 100644 mlir/include/mlir-c/Diagnostics.h create mode 100644 mlir/include/mlir/CAPI/Diagnostics.h create mode 100644 mlir/lib/CAPI/IR/Diagnostics.cpp diff --git a/mlir/include/mlir-c/Diagnostics.h b/mlir/include/mlir-c/Diagnostics.h new file mode 100644 index 0000000000000..40ea6d8c405a8 --- /dev/null +++ b/mlir/include/mlir-c/Diagnostics.h @@ -0,0 +1,87 @@ +/*===-- mlir-c/Diagnostics.h - MLIR Diagnostic subsystem C API ----*- C -*-===*\ +|* *| +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *| +|* Exceptions. *| +|* See https://llvm.org/LICENSE.txt for license information. *| +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header declares the C APIs accessing MLIR Diagnostics subsystem. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef MLIR_C_DIAGNOSTICS_H +#define MLIR_C_DIAGNOSTICS_H + +#include "mlir-c/IR.h" +#include "mlir-c/Support.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** An opaque reference to a dignostic, always owned by the diagnostics engine + * (context). Must not be stored outside of the diagnostic handler. */ +struct MlirDiagnostic { + void *ptr; +}; +typedef struct MlirDiagnostic MlirDiagnostic; + +/** Severity of a diagnostic. */ +enum MlirDiagnosticSeverity { + MlirDiagnosticError, + MlirDiagnosticWarning, + MlirDiagnosticNote, + MlirDiagnosticRemark +}; +typedef enum MlirDiagnosticSeverity MlirDiagnosticSeverity; + +/** Opaque identifier of a diagnostic handler, useful to detach a handler. */ +typedef uint64_t MlirDiagnosticHandlerID; + +/** Diagnostic handler type. Acceps a reference to a diagnostic, which is only + * guaranteed to be live during the call. If the handler processed the + * diagnostic completely, it is expected to return success. Otherwise, it is + * expected to return failure to indicate that other handlers should attempt to + * process the diagnostic. */ +typedef MlirLogicalResult (*MlirDiagnosticHandler)(MlirDiagnostic); + +/** Prints a diagnostic using the provided callback. */ +void mlirDiagnosticPrint(MlirDiagnostic diagnostic, MlirStringCallback callback, + void *userData); + +/** Returns the location at which the diagnostic is reported. */ +MlirLocation mlirDiagnosticGetLocation(MlirDiagnostic diagnostic); + +/** Returns the severity of the diagnostic. */ +MlirDiagnosticSeverity mlirDiagnosticGetSeverity(MlirDiagnostic diagnostic); + +/** Returns the number of notes attached to the diagnostic. */ +intptr_t mlirDiagnosticGetNumNotes(MlirDiagnostic diagnostic); + +/** Returns `pos`-th note attached to the diagnostic. Expects `pos` to be a + * valid zero-based index into the list of notes. */ +MlirDiagnostic mlirDiagnosticGetNote(MlirDiagnostic diagnostic, intptr_t pos); + +/** Attaches the diagnostic handler to the context. Handlers are invoked in the + * reverse order of attachment until one of them processes the diagnostic + * completely. Returns an identifier that can be used to detach the handler. */ +MlirDiagnosticHandlerID +mlirContextAttachDiagnosticHandler(MlirContext context, + MlirDiagnosticHandler handler); + +/** Detaches an attached diagnostic handler from the context given its + * identifier. */ +void mlirContextDetachDiagnosticHandler(MlirContext context, + MlirDiagnosticHandlerID id); + +/** Emits an error at the given location through the diagnostics engine. Used + * for testing purposes. */ +void mlirEmitError(MlirLocation location, const char *message); + +#ifdef __cplusplus +} +#endif + +#endif // MLIR_C_DIAGNOSTICS_H diff --git a/mlir/include/mlir-c/Support.h b/mlir/include/mlir-c/Support.h index 1039c68c09bf0..accf6cc0eab8e 100644 --- a/mlir/include/mlir-c/Support.h +++ b/mlir/include/mlir-c/Support.h @@ -50,6 +50,42 @@ inline MlirStringRef mlirStringRefCreate(const char *str, size_t length) { */ MlirStringRef mlirStringRefCreateFromCString(const char *str); +/*============================================================================*/ +/* MlirLogicalResult. */ +/*============================================================================*/ + +/** A logical result value, essentially a boolean with named states. LLVM + * convention for using boolean values to designate success or failure of an + * operation is a moving target, so MLIR opted for an explicit class. + * Instances of MlirLogicalResult must only be inspected using the associated + * functions. */ +struct MlirLogicalResult { + int8_t value; +}; +typedef struct MlirLogicalResult MlirLogicalResult; + +/** Checks if the given logical result represents a success. */ +inline int mlirLogicalResultIsSuccess(MlirLogicalResult res) { + return res.value != 0; +} + +/** Checks if the given logical result represents a failure. */ +inline int mlirLogicalResultIsFailure(MlirLogicalResult res) { + return res.value == 0; +} + +/** Creates a logical result representing a success. */ +inline static MlirLogicalResult mlirLogicalResultSuccess() { + MlirLogicalResult res = {1}; + return res; +} + +/** Creates a logical result representing a failure. */ +inline static MlirLogicalResult mlirLogicalResultFailure() { + MlirLogicalResult res = {0}; + return res; +} + #ifdef __cplusplus } #endif diff --git a/mlir/include/mlir/CAPI/Diagnostics.h b/mlir/include/mlir/CAPI/Diagnostics.h new file mode 100644 index 0000000000000..a632b9b99499b --- /dev/null +++ b/mlir/include/mlir/CAPI/Diagnostics.h @@ -0,0 +1,28 @@ +//===- IR.h - C API Utils for MLIR Diagnostics ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CAPI_DIAGNOSTICS_H +#define MLIR_CAPI_DIAGNOSTICS_H + +#include "mlir-c/Diagnostics.h" +#include + +namespace mlir { +class Diagnostic; +} // namespace mlir + +inline mlir::Diagnostic &unwrap(MlirDiagnostic diagnostic) { + assert(diagnostic.ptr && "unexpected null diagnostic"); + return *(static_cast(diagnostic.ptr)); +} + +inline MlirDiagnostic wrap(mlir::Diagnostic &diagnostic) { + return {&diagnostic}; +} + +#endif // MLIR_CAPI_DIAGNOSTICS_H diff --git a/mlir/include/mlir/CAPI/Support.h b/mlir/include/mlir/CAPI/Support.h index 0c2b069906657..6d9a59abf111f 100644 --- a/mlir/include/mlir/CAPI/Support.h +++ b/mlir/include/mlir/CAPI/Support.h @@ -16,6 +16,7 @@ #define MLIR_CAPI_SUPPORT_H #include "mlir-c/Support.h" +#include "mlir/Support/LogicalResult.h" #include "llvm/ADT/StringRef.h" /// Converts a StringRef into its MLIR C API equivalent. @@ -28,4 +29,14 @@ inline llvm::StringRef unwrap(MlirStringRef ref) { return llvm::StringRef(ref.data, ref.length); } +inline MlirLogicalResult wrap(mlir::LogicalResult res) { + if (mlir::succeeded(res)) + return mlirLogicalResultSuccess(); + return mlirLogicalResultFailure(); +} + +inline mlir::LogicalResult unwrap(MlirLogicalResult res) { + return mlir::success(mlirLogicalResultIsSuccess(res)); +} + #endif // MLIR_CAPI_SUPPORT_H diff --git a/mlir/lib/CAPI/IR/CMakeLists.txt b/mlir/lib/CAPI/IR/CMakeLists.txt index 4158a4c96efd0..cf285fd06e605 100644 --- a/mlir/lib/CAPI/IR/CMakeLists.txt +++ b/mlir/lib/CAPI/IR/CMakeLists.txt @@ -1,6 +1,7 @@ # Main API. add_mlir_library(MLIRCAPIIR AffineMap.cpp + Diagnostics.cpp IR.cpp StandardAttributes.cpp StandardTypes.cpp diff --git a/mlir/lib/CAPI/IR/Diagnostics.cpp b/mlir/lib/CAPI/IR/Diagnostics.cpp new file mode 100644 index 0000000000000..9595446f9d5ab --- /dev/null +++ b/mlir/lib/CAPI/IR/Diagnostics.cpp @@ -0,0 +1,75 @@ +//===- Diagnostics.cpp - C Interface for MLIR Diagnostics -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir-c/Diagnostics.h" +#include "mlir/CAPI/Diagnostics.h" +#include "mlir/CAPI/IR.h" +#include "mlir/CAPI/Support.h" +#include "mlir/CAPI/Utils.h" +#include "mlir/IR/Diagnostics.h" + +using namespace mlir; + +void mlirDiagnosticPrint(MlirDiagnostic diagnostic, MlirStringCallback callback, + void *userData) { + detail::CallbackOstream stream(callback, userData); + unwrap(diagnostic).print(stream); + stream.flush(); +} + +MlirLocation mlirDiagnosticGetLocation(MlirDiagnostic diagnostic) { + return wrap(unwrap(diagnostic).getLocation()); +} + +MlirDiagnosticSeverity mlirDiagnosticGetSeverity(MlirDiagnostic diagnostic) { + switch (unwrap(diagnostic).getSeverity()) { + case mlir::DiagnosticSeverity::Error: + return MlirDiagnosticError; + case mlir::DiagnosticSeverity::Warning: + return MlirDiagnosticWarning; + case mlir::DiagnosticSeverity::Note: + return MlirDiagnosticNote; + case mlir::DiagnosticSeverity::Remark: + return MlirDiagnosticRemark; + } + llvm_unreachable("unhandled diagnostic severity"); +} + +// Notes are stored in a vector, so note iterator range is a pair of +// random access iterators, for which it is cheap to compute the size. +intptr_t mlirDiagnosticGetNumNotes(MlirDiagnostic diagnostic) { + return static_cast(llvm::size(unwrap(diagnostic).getNotes())); +} + +// Notes are stored in a vector, so the iterator is a random access iterator, +// cheap to advance multiple steps at a time. +MlirDiagnostic mlirDiagnosticGetNote(MlirDiagnostic diagnostic, intptr_t pos) { + return wrap(*std::next(unwrap(diagnostic).getNotes().begin(), pos)); +} + +MlirDiagnosticHandlerID +mlirContextAttachDiagnosticHandler(MlirContext context, + MlirDiagnosticHandler handler) { + assert(handler && "unexpected null diagnostic handler"); + DiagnosticEngine::HandlerID id = + unwrap(context)->getDiagEngine().registerHandler( + [handler](Diagnostic &diagnostic) { + return unwrap(handler(wrap(diagnostic))); + }); + return static_cast(id); +} + +void mlirContextDetachDiagnosticHandler(MlirContext context, + MlirDiagnosticHandlerID id) { + unwrap(context)->getDiagEngine().eraseHandler( + static_cast(id)); +} + +void mlirEmitError(MlirLocation location, const char *message) { + emitError(unwrap(location)) << message; +} diff --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c index ae60d56a22ed8..18c4e8b085597 100644 --- a/mlir/test/CAPI/ir.c +++ b/mlir/test/CAPI/ir.c @@ -10,8 +10,9 @@ /* RUN: mlir-capi-ir-test 2>&1 | FileCheck %s */ -#include "mlir-c/IR.h" #include "mlir-c/AffineMap.h" +#include "mlir-c/Diagnostics.h" +#include "mlir-c/IR.h" #include "mlir-c/Registration.h" #include "mlir-c/StandardAttributes.h" #include "mlir-c/StandardDialect.h" @@ -827,6 +828,28 @@ int registerOnlyStd() { return 0; } +// Wraps a diagnostic into additional text we can match against. +MlirLogicalResult errorHandler(MlirDiagnostic diagnostic) { + fprintf(stderr, "processing diagnostic <<\n"); + mlirDiagnosticPrint(diagnostic, printToStderr, NULL); + fprintf(stderr, "\n"); + MlirLocation loc = mlirDiagnosticGetLocation(diagnostic); + mlirLocationPrint(loc, printToStderr, NULL); + assert(mlirDiagnosticGetNumNotes(diagnostic) == 0); + fprintf(stderr, ">> end of diagnostic\n"); + return mlirLogicalResultSuccess(); +} + +void testDiagnostics() { + MlirContext ctx = mlirContextCreate(); + MlirDiagnosticHandlerID id = + mlirContextAttachDiagnosticHandler(ctx, errorHandler); + MlirLocation loc = mlirLocationUnknownGet(ctx); + mlirEmitError(loc, "test diagnostics"); + mlirContextDetachDiagnosticHandler(ctx, id); + mlirEmitError(loc, "more test diagnostics"); +} + int main() { MlirContext ctx = mlirContextCreate(); mlirRegisterAllDialects(ctx); @@ -982,5 +1005,16 @@ int main() { mlirContextDestroy(ctx); + fprintf(stderr, "@test_diagnostics\n"); + testDiagnostics(); + // clang-format off + // CHECK-LABEL: @test_diagnostics + // CHECK: processing diagnostic << + // CHECK: test diagnostics + // CHECK: loc(unknown) + // CHECK: >> end of diagnostic + // CHECK-NOT: processing diagnostic + // CHECK: more test diagnostics + return 0; } From 602c193e2aeb19f5d2e48075281601a2386e8758 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 5 Oct 2020 16:16:13 -0400 Subject: [PATCH 218/321] [libc++] Make sure __clear_and_shrink() maintains string invariants __clear_and_shrink() was added in D41976, and a test was added alongside it to make sure that the string invariants were maintained. However, it appears that the test never ran under UBSan before, which would have highlighted the fact that it doesn't actually maintain the string invariants. Differential Revision: https://reviews.llvm.org/D88849 --- libcxx/include/string | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libcxx/include/string b/libcxx/include/string index b6380da95c64b..c900d9609ed40 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -3941,7 +3941,7 @@ basic_string<_CharT, _Traits, _Allocator>::__invariants() const return false; if (data() == 0) return false; - if (data()[size()] != value_type(0)) + if (data()[size()] != value_type()) return false; return true; } @@ -3959,6 +3959,7 @@ basic_string<_CharT, _Traits, _Allocator>::__clear_and_shrink() _NOEXCEPT __alloc_traits::deallocate(__alloc(), __get_long_pointer(), capacity() + 1); __set_long_cap(0); __set_short_size(0); + traits_type::assign(*__get_short_pointer(), value_type()); } } From ce1365f8f7e3d8297d74afafe7cbbda442a25c50 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 6 Oct 2020 16:46:58 -0400 Subject: [PATCH 219/321] [libc++] Add a CMake option to control whether the debug mode is supported Some libc++ builds may want to disable support for the debug mode, for example to reduce code size or because the current implementation of the debug mode requires a global map. This commit adds the LIBCXX_ENABLE_DEBUG_MODE CMake option and ties it into the test suite. It also adds a CI job to test this configuration going forward. Differential Revision: https://reviews.llvm.org/D88923 --- libcxx/CMakeLists.txt | 5 +++++ libcxx/cmake/caches/Apple.cmake | 1 + libcxx/cmake/caches/Generic-nodebug.cmake | 1 + libcxx/src/CMakeLists.txt | 5 ++++- libcxx/test/CMakeLists.txt | 1 + libcxx/test/configs/legacy.cfg.in | 1 + .../sequences/array/array.zero/db_back.pass.cpp | 4 +--- .../sequences/array/array.zero/db_front.pass.cpp | 4 +--- .../sequences/array/array.zero/db_indexing.pass.cpp | 4 +--- .../containers/sequences/list/list.cons/db_copy.pass.cpp | 4 ++-- .../containers/sequences/list/list.cons/db_move.pass.cpp | 8 +++----- .../sequences/list/list.modifiers/emplace_db1.pass.cpp | 8 +++----- .../sequences/list/list.modifiers/erase_iter_db1.pass.cpp | 5 ++--- .../sequences/list/list.modifiers/erase_iter_db2.pass.cpp | 5 ++--- .../list/list.modifiers/erase_iter_iter_db1.pass.cpp | 5 ++--- .../list/list.modifiers/erase_iter_iter_db2.pass.cpp | 5 ++--- .../list/list.modifiers/erase_iter_iter_db3.pass.cpp | 5 ++--- .../list/list.modifiers/erase_iter_iter_db4.pass.cpp | 5 ++--- .../list.modifiers/insert_iter_iter_iter_db1.pass.cpp | 4 +--- .../list/list.modifiers/insert_iter_rvalue_db1.pass.cpp | 5 ++--- .../list.modifiers/insert_iter_size_value_db1.pass.cpp | 5 ++--- .../list/list.modifiers/insert_iter_value_db1.pass.cpp | 5 ++--- .../sequences/list/list.modifiers/pop_back_db1.pass.cpp | 5 ++--- .../sequences/list/list.ops/db_splice_pos_list.pass.cpp | 5 ++--- .../list/list.ops/db_splice_pos_list_iter.pass.cpp | 5 ++--- .../list/list.ops/db_splice_pos_list_iter_iter.pass.cpp | 5 ++--- .../libcxx/containers/sequences/vector/db_back.pass.cpp | 3 +-- .../libcxx/containers/sequences/vector/db_cback.pass.cpp | 3 +-- .../libcxx/containers/sequences/vector/db_cfront.pass.cpp | 3 +-- .../libcxx/containers/sequences/vector/db_cindex.pass.cpp | 3 +-- .../libcxx/containers/sequences/vector/db_front.pass.cpp | 3 +-- .../libcxx/containers/sequences/vector/db_index.pass.cpp | 3 +-- .../containers/sequences/vector/db_iterators_2.pass.cpp | 3 +-- .../containers/sequences/vector/db_iterators_3.pass.cpp | 3 +-- .../containers/sequences/vector/db_iterators_4.pass.cpp | 3 +-- .../containers/sequences/vector/db_iterators_5.pass.cpp | 3 +-- .../containers/sequences/vector/db_iterators_6.pass.cpp | 3 +-- .../containers/sequences/vector/db_iterators_7.pass.cpp | 3 +-- .../containers/sequences/vector/db_iterators_8.pass.cpp | 3 +-- .../containers/sequences/vector/pop_back_empty.pass.cpp | 3 +-- .../libcxx/containers/unord/unord.map/db_bucket.pass.cpp | 3 +-- .../unord/unord.map/db_insert_hint_const_lvalue.pass.cpp | 3 +-- .../unord/unord.map/db_insert_hint_rvalue.pass.cpp | 3 +-- .../containers/unord/unord.map/db_iterators_7.pass.cpp | 3 +-- .../containers/unord/unord.map/db_iterators_8.pass.cpp | 3 +-- .../unord/unord.map/db_local_iterators_7.pass.cpp | 3 +-- .../unord/unord.map/db_local_iterators_8.pass.cpp | 3 +-- .../libcxx/containers/unord/unord.map/db_move.pass.cpp | 4 +--- .../unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp | 3 +-- .../unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp | 3 +-- .../unord.map.modifiers/erase_iter_iter_db1.pass.cpp | 3 +-- .../unord.map.modifiers/erase_iter_iter_db2.pass.cpp | 3 +-- .../unord.map.modifiers/erase_iter_iter_db3.pass.cpp | 3 +-- .../unord.map.modifiers/erase_iter_iter_db4.pass.cpp | 3 +-- .../unord/unord.map/unord.map.swap/db_swap_1.pass.cpp | 3 +-- .../unord.multimap/db_insert_hint_const_lvalue.pass.cpp | 3 +-- .../unord/unord.multimap/db_insert_hint_rvalue.pass.cpp | 3 +-- .../unord/unord.multimap/db_iterators_7.pass.cpp | 3 +-- .../unord/unord.multimap/db_iterators_8.pass.cpp | 3 +-- .../unord/unord.multimap/db_local_iterators_7.pass.cpp | 3 +-- .../unord/unord.multimap/db_local_iterators_8.pass.cpp | 3 +-- .../containers/unord/unord.multimap/db_move.pass.cpp | 3 +-- .../unord.multimap.modifiers/erase_iter_db1.pass.cpp | 3 +-- .../unord.multimap.modifiers/erase_iter_db2.pass.cpp | 3 +-- .../unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp | 3 +-- .../unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp | 3 +-- .../unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp | 3 +-- .../unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp | 3 +-- .../unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp | 3 +-- .../unord.multiset/db_insert_hint_const_lvalue.pass.cpp | 3 +-- .../unord/unord.multiset/db_iterators_7.pass.cpp | 3 +-- .../unord/unord.multiset/db_iterators_8.pass.cpp | 3 +-- .../unord/unord.multiset/db_local_iterators_7.pass.cpp | 3 +-- .../unord/unord.multiset/db_local_iterators_8.pass.cpp | 3 +-- .../containers/unord/unord.multiset/db_move.pass.cpp | 3 +-- .../unord/unord.multiset/erase_iter_db1.pass.cpp | 3 +-- .../unord/unord.multiset/erase_iter_db2.pass.cpp | 3 +-- .../unord/unord.multiset/erase_iter_iter_db1.pass.cpp | 3 +-- .../unord/unord.multiset/erase_iter_iter_db2.pass.cpp | 3 +-- .../unord/unord.multiset/erase_iter_iter_db3.pass.cpp | 3 +-- .../unord/unord.multiset/erase_iter_iter_db4.pass.cpp | 3 +-- .../unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp | 3 +-- .../unord/unord.set/db_insert_hint_const_lvalue.pass.cpp | 3 +-- .../containers/unord/unord.set/db_iterators_7.pass.cpp | 3 +-- .../containers/unord/unord.set/db_iterators_8.pass.cpp | 3 +-- .../unord/unord.set/db_local_iterators_7.pass.cpp | 3 +-- .../unord/unord.set/db_local_iterators_8.pass.cpp | 3 +-- .../libcxx/containers/unord/unord.set/db_move.pass.cpp | 4 +--- .../containers/unord/unord.set/erase_iter_db1.pass.cpp | 3 +-- .../containers/unord/unord.set/erase_iter_db2.pass.cpp | 3 +-- .../unord/unord.set/erase_iter_iter_db1.pass.cpp | 3 +-- .../unord/unord.set/erase_iter_iter_db2.pass.cpp | 3 +-- .../unord/unord.set/erase_iter_iter_db3.pass.cpp | 3 +-- .../unord/unord.set/erase_iter_iter_db4.pass.cpp | 3 +-- .../unord/unord.set/unord.set.swap/db_swap_1.pass.cpp | 3 +-- .../containers/db_associative_container_tests.pass.cpp | 5 ++--- .../db_sequence_container_iterators.multithread.pass.cpp | 5 ++--- .../containers/db_sequence_container_iterators.pass.cpp | 5 ++--- libcxx/test/libcxx/debug/containers/db_string.pass.cpp | 5 ++--- .../debug/containers/db_unord_container_tests.pass.cpp | 5 ++--- libcxx/test/libcxx/debug/db_string_view.pass.cpp | 5 ++--- libcxx/test/libcxx/debug/debug_abort.pass.cpp | 4 +--- libcxx/test/libcxx/debug/debug_helper_test.pass.cpp | 4 +--- libcxx/test/libcxx/debug/debug_register.pass.cpp | 4 +--- .../filesystems/class.path/path.itr/iterator_db.pass.cpp | 4 +--- libcxx/test/libcxx/iterators/advance.debug1.pass.cpp | 3 +-- libcxx/test/libcxx/iterators/next.debug1.pass.cpp | 3 +-- libcxx/test/libcxx/iterators/prev.debug1.pass.cpp | 4 +--- .../strings/basic.string/string.access/db_back.pass.cpp | 3 +-- .../strings/basic.string/string.access/db_cback.pass.cpp | 3 +-- .../strings/basic.string/string.access/db_cfront.pass.cpp | 3 +-- .../strings/basic.string/string.access/db_cindex.pass.cpp | 3 +-- .../strings/basic.string/string.access/db_front.pass.cpp | 3 +-- .../strings/basic.string/string.access/db_index.pass.cpp | 3 +-- .../basic.string/string.iterators/db_iterators_2.pass.cpp | 3 +-- .../basic.string/string.iterators/db_iterators_3.pass.cpp | 3 +-- .../basic.string/string.iterators/db_iterators_4.pass.cpp | 3 +-- .../basic.string/string.iterators/db_iterators_5.pass.cpp | 3 +-- .../basic.string/string.iterators/db_iterators_6.pass.cpp | 3 +-- .../basic.string/string.iterators/db_iterators_7.pass.cpp | 3 +-- .../basic.string/string.iterators/db_iterators_8.pass.cpp | 3 +-- .../string.modifiers/clear_and_shrink_db1.pass.cpp | 3 +-- .../basic.string/string.modifiers/erase_iter_db1.pass.cpp | 3 +-- .../basic.string/string.modifiers/erase_iter_db2.pass.cpp | 3 +-- .../string.modifiers/erase_iter_iter_db1.pass.cpp | 3 +-- .../string.modifiers/erase_iter_iter_db2.pass.cpp | 3 +-- .../string.modifiers/erase_iter_iter_db3.pass.cpp | 3 +-- .../string.modifiers/erase_iter_iter_db4.pass.cpp | 3 +-- .../string.modifiers/erase_pop_back_db1.pass.cpp | 3 +-- .../string.modifiers/insert_iter_char_db1.pass.cpp | 3 +-- .../string.modifiers/insert_iter_iter_iter_db1.pass.cpp | 3 +-- .../string.modifiers/insert_iter_size_char_db1.pass.cpp | 3 +-- .../thread/futures/futures.promise/set_exception.pass.cpp | 4 +--- .../futures.promise/set_exception_at_thread_exit.pass.cpp | 4 +--- libcxx/utils/ci/buildkite-pipeline.yml | 5 +++++ libcxx/utils/ci/macos-backdeployment.sh | 1 + libcxx/utils/ci/run-buildbot.sh | 6 ++++++ libcxx/utils/libcxx/test/params.py | 6 +++++- 138 files changed, 182 insertions(+), 296 deletions(-) create mode 100644 libcxx/cmake/caches/Generic-nodebug.cmake diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index 8e7df5d19610e..8599e5d2cc997 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -94,6 +94,11 @@ option(LIBCXX_ENABLE_FILESYSTEM "Build filesystem as part of the main libc++ lib ${ENABLE_FILESYSTEM_DEFAULT}) option(LIBCXX_INCLUDE_TESTS "Build the libc++ tests." ${LLVM_INCLUDE_TESTS}) option(LIBCXX_ENABLE_PARALLEL_ALGORITHMS "Enable the parallel algorithms library. This requires the PSTL to be available." OFF) +option(LIBCXX_ENABLE_DEBUG_MODE + "Whether to include support for libc++'s debugging mode in the library. + By default, this is turned on. If you turn it off and try to enable the + debug mode when compiling a program against libc++, it will fail to link + since the required support isn't provided in the library." ON) option(LIBCXX_TEST_GDB_PRETTY_PRINTERS "Test gdb pretty printers." OFF) set(LIBCXX_TEST_CONFIG "${CMAKE_CURRENT_SOURCE_DIR}/test/configs/legacy.cfg.in" CACHE STRING "The Lit testing configuration to use when running the tests.") diff --git a/libcxx/cmake/caches/Apple.cmake b/libcxx/cmake/caches/Apple.cmake index 622a3af84f2bc..cab7c1407d63e 100644 --- a/libcxx/cmake/caches/Apple.cmake +++ b/libcxx/cmake/caches/Apple.cmake @@ -11,6 +11,7 @@ set(LIBCXX_TYPEINFO_COMPARISON_IMPLEMENTATION "1" CACHE STRING "") set(LIBCXX_CXX_ABI libcxxabi CACHE STRING "") set(LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "") set(LIBCXX_HIDE_FROM_ABI_PER_TU_BY_DEFAULT ON CACHE BOOL "") +set(LIBCXX_ENABLE_DEBUG_MODE OFF CACHE BOOL "") set(LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS ON CACHE BOOL "") set(LIBCXXABI_ENABLE_PIC OFF CACHE BOOL "") diff --git a/libcxx/cmake/caches/Generic-nodebug.cmake b/libcxx/cmake/caches/Generic-nodebug.cmake new file mode 100644 index 0000000000000..b301b2ef1c7d3 --- /dev/null +++ b/libcxx/cmake/caches/Generic-nodebug.cmake @@ -0,0 +1 @@ +set(LIBCXX_ENABLE_DEBUG_MODE OFF CACHE BOOL "") diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index 0e6819369ffa1..97e6e226b1ac3 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -11,7 +11,6 @@ set(LIBCXX_SOURCES chrono.cpp condition_variable.cpp condition_variable_destructor.cpp - debug.cpp exception.cpp functional.cpp future.cpp @@ -56,6 +55,10 @@ set(LIBCXX_SOURCES vector.cpp ) +if (LIBCXX_ENABLE_DEBUG_MODE) + list(APPEND LIBCXX_SOURCES debug.cpp) +endif() + if(WIN32) list(APPEND LIBCXX_SOURCES support/win32/locale_win32.cpp diff --git a/libcxx/test/CMakeLists.txt b/libcxx/test/CMakeLists.txt index b06984fc0ba93..e2e3382b779a4 100644 --- a/libcxx/test/CMakeLists.txt +++ b/libcxx/test/CMakeLists.txt @@ -70,6 +70,7 @@ pythonize_bool(LIBCXX_HAS_ATOMIC_LIB) pythonize_bool(LIBCXX_HAVE_CXX_ATOMICS_WITH_LIB) pythonize_bool(LIBCXX_BUILD_EXTERNAL_THREAD_LIBRARY) pythonize_bool(LIBCXX_DEBUG_BUILD) +pythonize_bool(LIBCXX_ENABLE_DEBUG_MODE) pythonize_bool(LIBCXX_ENABLE_PARALLEL_ALGORITHMS) # By default, for non-standalone builds, libcxx and libcxxabi share a library diff --git a/libcxx/test/configs/legacy.cfg.in b/libcxx/test/configs/legacy.cfg.in index efb41a93e41b9..4bfc9bc69a7d9 100644 --- a/libcxx/test/configs/legacy.cfg.in +++ b/libcxx/test/configs/legacy.cfg.in @@ -9,6 +9,7 @@ config.libcxx_src_root = "@LIBCXX_SOURCE_DIR@" config.libcxx_obj_root = "@LIBCXX_BINARY_DIR@" config.cxx_library_root = "@LIBCXX_LIBRARY_DIR@" config.enable_exceptions = @LIBCXX_ENABLE_EXCEPTIONS@ +config.enable_debug_tests = @LIBCXX_ENABLE_DEBUG_MODE@ config.enable_experimental = @LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY@ config.enable_filesystem = @LIBCXX_ENABLE_FILESYSTEM@ config.enable_rtti = @LIBCXX_ENABLE_RTTI@ diff --git a/libcxx/test/libcxx/containers/sequences/array/array.zero/db_back.pass.cpp b/libcxx/test/libcxx/containers/sequences/array/array.zero/db_back.pass.cpp index 8c50962953671..24610386c9b99 100644 --- a/libcxx/test/libcxx/containers/sequences/array/array.zero/db_back.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/array/array.zero/db_back.pass.cpp @@ -9,9 +9,7 @@ // UNSUPPORTED: windows // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 - -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode // test array::front() raises a debug error. diff --git a/libcxx/test/libcxx/containers/sequences/array/array.zero/db_front.pass.cpp b/libcxx/test/libcxx/containers/sequences/array/array.zero/db_front.pass.cpp index 22c766fd67e9c..48715b6300b79 100644 --- a/libcxx/test/libcxx/containers/sequences/array/array.zero/db_front.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/array/array.zero/db_front.pass.cpp @@ -9,9 +9,7 @@ // UNSUPPORTED: windows // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 - -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode // test array::front() raises a debug error. diff --git a/libcxx/test/libcxx/containers/sequences/array/array.zero/db_indexing.pass.cpp b/libcxx/test/libcxx/containers/sequences/array/array.zero/db_indexing.pass.cpp index 26bbea77a208d..a8eda0115d4b2 100644 --- a/libcxx/test/libcxx/containers/sequences/array/array.zero/db_indexing.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/array/array.zero/db_indexing.pass.cpp @@ -9,9 +9,7 @@ // UNSUPPORTED: windows // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 - -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode // test array::operator[] raises a debug error. diff --git a/libcxx/test/libcxx/containers/sequences/list/list.cons/db_copy.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.cons/db_copy.pass.cpp index ae4964ff51aa3..aaf3760590ef9 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.cons/db_copy.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.cons/db_copy.pass.cpp @@ -7,11 +7,11 @@ //===----------------------------------------------------------------------===// // -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx // list(list&& c); +// UNSUPPORTED: libcxx-no-debug-mode + #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/list/list.cons/db_move.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.cons/db_move.pass.cpp index 3592c866f8620..08cd297cf700c 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.cons/db_move.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.cons/db_move.pass.cpp @@ -6,15 +6,13 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // // list(list&& c); +// UNSUPPORTED: c++03 +// UNSUPPORTED: libcxx-no-debug-mode + #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(1)) diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/emplace_db1.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/emplace_db1.pass.cpp index 00e3e7d9ee336..6b9cb48cb40a8 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/emplace_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/emplace_db1.pass.cpp @@ -6,15 +6,13 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // // template void emplace(const_iterator p, Args&&... args); +// UNSUPPORTED: c++03 +// UNSUPPORTED: libcxx-no-debug-mode + #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_db1.pass.cpp index 63a6a4421507e..65f99f781c552 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_db1.pass.cpp @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // // Call erase(const_iterator position) with end() +// UNSUPPORTED: libcxx-no-debug-mode + #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_db2.pass.cpp index 8a785e20b6187..915f25af33078 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_db2.pass.cpp @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // // Call erase(const_iterator position) with iterator from another container +// UNSUPPORTED: libcxx-no-debug-mode + #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db1.pass.cpp index 3c0188f8eb907..e39d7350debca 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db1.pass.cpp @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // // Call erase(const_iterator first, const_iterator last); with first iterator from another container +// UNSUPPORTED: libcxx-no-debug-mode + #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db2.pass.cpp index 24026d57d3d95..adaac382e64f4 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db2.pass.cpp @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // // Call erase(const_iterator first, const_iterator last); with second iterator from another container +// UNSUPPORTED: libcxx-no-debug-mode + #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db3.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db3.pass.cpp index fc7088f5888a4..76113d5e405c2 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db3.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db3.pass.cpp @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // // Call erase(const_iterator first, const_iterator last); with both iterators from another container +// UNSUPPORTED: libcxx-no-debug-mode + #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db4.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db4.pass.cpp index feca2d06ffda7..5254865b3bdbf 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db4.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db4.pass.cpp @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // // Call erase(const_iterator first, const_iterator last); with a bad range +// UNSUPPORTED: libcxx-no-debug-mode + #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_iter_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_iter_iter_db1.pass.cpp index aa63556f1e0b5..a4d207720700c 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_iter_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_iter_iter_db1.pass.cpp @@ -6,14 +6,12 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // // template // iterator insert(const_iterator position, Iter first, Iter last); +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_rvalue_db1.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_rvalue_db1.pass.cpp index 71e43cdeff2e7..9f126b06b5543 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_rvalue_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_rvalue_db1.pass.cpp @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // // iterator insert(const_iterator position, value_type&& x); +// UNSUPPORTED: libcxx-no-debug-mode + #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_size_value_db1.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_size_value_db1.pass.cpp index 10e9cebad9284..c5b4c66042074 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_size_value_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_size_value_db1.pass.cpp @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // // iterator insert(const_iterator position, size_type n, const value_type& x); +// UNSUPPORTED: libcxx-no-debug-mode + #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_value_db1.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_value_db1.pass.cpp index 9a2c989e1e4be..083dbc5fd4da8 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_value_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_value_db1.pass.cpp @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // // iterator insert(const_iterator position, const value_type& x); +// UNSUPPORTED: libcxx-no-debug-mode + #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/pop_back_db1.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/pop_back_db1.pass.cpp index 4b7e1778eb83f..34e5f403c5638 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/pop_back_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/pop_back_db1.pass.cpp @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // // void pop_back(); +// UNSUPPORTED: libcxx-no-debug-mode + #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list.pass.cpp index a32364ff2d2fa..6dcfa6002088b 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list.pass.cpp @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // // void splice(const_iterator position, list& x); +// UNSUPPORTED: libcxx-no-debug-mode + #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list_iter.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list_iter.pass.cpp index de0c6ce9444e4..1e1ab73b0ece2 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list_iter.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list_iter.pass.cpp @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // // void splice(const_iterator position, list& x, iterator i); +// UNSUPPORTED: libcxx-no-debug-mode + #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list_iter_iter.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list_iter_iter.pass.cpp index d4da6d55136d9..4e0c440a6e267 100644 --- a/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list_iter_iter.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list_iter_iter.pass.cpp @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // // void splice(const_iterator position, list& x, iterator first, iterator last); +// UNSUPPORTED: libcxx-no-debug-mode + #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_back.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_back.pass.cpp index 60056d9824071..32138788185f8 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_back.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_back.pass.cpp @@ -10,8 +10,7 @@ // Call back() on empty container. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_cback.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_cback.pass.cpp index d038e2987e105..91bb9829bd4dc 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_cback.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_cback.pass.cpp @@ -10,8 +10,7 @@ // Call back() on empty const container. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_cfront.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_cfront.pass.cpp index 7175a0930043d..aa3c8906496a8 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_cfront.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_cfront.pass.cpp @@ -10,8 +10,7 @@ // Call front() on empty const container. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_cindex.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_cindex.pass.cpp index 9c094b90afb43..066961cf8b934 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_cindex.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_cindex.pass.cpp @@ -10,8 +10,7 @@ // Index const vector out of bounds. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_front.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_front.pass.cpp index b68fdf829371f..a8300b0879644 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_front.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_front.pass.cpp @@ -10,8 +10,7 @@ // Call front() on empty container. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_index.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_index.pass.cpp index 3796969775f64..69e9c127ff1d3 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_index.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_index.pass.cpp @@ -10,8 +10,7 @@ // Index vector out of bounds. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_2.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_2.pass.cpp index b1a1c5aef26a7..91e1cf88383e5 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_2.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_2.pass.cpp @@ -10,8 +10,7 @@ // Compare iterators from different containers with <. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_3.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_3.pass.cpp index 45e6b2641d395..91701c7a696d8 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_3.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_3.pass.cpp @@ -10,8 +10,7 @@ // Subtract iterators from different containers. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_4.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_4.pass.cpp index ae62fabdbdaf2..bb2a1277b540e 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_4.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_4.pass.cpp @@ -10,8 +10,7 @@ // Index iterator out of bounds. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_5.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_5.pass.cpp index 330e8dd3210d4..cca03c4c791ab 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_5.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_5.pass.cpp @@ -10,8 +10,7 @@ // Add to iterator out of bounds. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_6.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_6.pass.cpp index 97b406f5338fe..bbc28e97ab3bc 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_6.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_6.pass.cpp @@ -10,8 +10,7 @@ // Decrement iterator prior to begin. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_7.pass.cpp index 7dbee2134a59f..eaeb4e1b58dcc 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_7.pass.cpp @@ -10,8 +10,7 @@ // Increment iterator past end. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_8.pass.cpp index 0754aaee597ee..e3900186f72b0 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/db_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/db_iterators_8.pass.cpp @@ -10,8 +10,7 @@ // Dereference non-dereferenceable iterator. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/sequences/vector/pop_back_empty.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/pop_back_empty.pass.cpp index 32ab5f65ddc15..e1afe30d35ccc 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/pop_back_empty.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/pop_back_empty.pass.cpp @@ -10,8 +10,7 @@ // pop_back() more than the number of elements in a vector -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.map/db_bucket.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/db_bucket.pass.cpp index 242b43912fb06..fc14f0e62521c 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/db_bucket.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/db_bucket.pass.cpp @@ -10,8 +10,7 @@ // size_type bucket(const key_type& __k) const; -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.map/db_insert_hint_const_lvalue.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/db_insert_hint_const_lvalue.pass.cpp index 5c6c51fa7ade5..6abe0e799c4d4 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/db_insert_hint_const_lvalue.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/db_insert_hint_const_lvalue.pass.cpp @@ -10,8 +10,7 @@ // iterator insert(const_iterator p, const value_type& x); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.map/db_insert_hint_rvalue.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/db_insert_hint_rvalue.pass.cpp index 83ac37948fa3e..09c9ce374dfac 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/db_insert_hint_rvalue.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/db_insert_hint_rvalue.pass.cpp @@ -14,8 +14,7 @@ // class = typename enable_if::value>::type> // iterator insert(const_iterator p, P&& x); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.map/db_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/db_iterators_7.pass.cpp index 513c56034a685..814d8bbf0dfe1 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/db_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/db_iterators_7.pass.cpp @@ -10,8 +10,7 @@ // Increment iterator past end. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.map/db_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/db_iterators_8.pass.cpp index f12ba00cabc8d..2ef8fd62a827a 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/db_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/db_iterators_8.pass.cpp @@ -10,8 +10,7 @@ // Dereference non-dereferenceable iterator. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.map/db_local_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/db_local_iterators_7.pass.cpp index 19b0ee7c2674c..9902f19ca9b7a 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/db_local_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/db_local_iterators_7.pass.cpp @@ -10,8 +10,7 @@ // Increment local_iterator past end. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.map/db_local_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/db_local_iterators_8.pass.cpp index d696d54336a99..849593173809a 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/db_local_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/db_local_iterators_8.pass.cpp @@ -10,8 +10,7 @@ // Dereference non-dereferenceable iterator. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.map/db_move.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/db_move.pass.cpp index 5ae9a1403c493..5ccd7aa0d685e 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/db_move.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/db_move.pass.cpp @@ -11,9 +11,7 @@ // unordered_map(unordered_map&& u); // UNSUPPORTED: c++03 - -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp index a3873ec8c5e99..df3e838b9133b 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator position) with end() -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp index 7aa39f2000cbe..47c7b5cf022b4 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator position) with iterator from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db1.pass.cpp index 841b90073c551..3f7fd6611a4d4 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db1.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with first iterator from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db2.pass.cpp index b124a94b7f593..f2a3b4b8a7735 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db2.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with second iterator from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db3.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db3.pass.cpp index c61cfde6620c4..7b39a336a34f5 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db3.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db3.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with both iterators from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db4.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db4.pass.cpp index 4a485c3ce7340..e120deb16edf8 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db4.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db4.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with a bad range -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.map/unord.map.swap/db_swap_1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.swap/db_swap_1.pass.cpp index 3e01d659417fe..1985f25402fb6 100644 --- a/libcxx/test/libcxx/containers/unord/unord.map/unord.map.swap/db_swap_1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.map/unord.map.swap/db_swap_1.pass.cpp @@ -14,8 +14,7 @@ // void swap(unordered_map& x, unordered_map& y); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/db_insert_hint_const_lvalue.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/db_insert_hint_const_lvalue.pass.cpp index de8b504f10e97..08c244c0ef826 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multimap/db_insert_hint_const_lvalue.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/db_insert_hint_const_lvalue.pass.cpp @@ -10,8 +10,7 @@ // iterator insert(const_iterator p, const value_type& x); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/db_insert_hint_rvalue.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/db_insert_hint_rvalue.pass.cpp index 47bfb4b87924a..cf3cfec3e710e 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multimap/db_insert_hint_rvalue.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/db_insert_hint_rvalue.pass.cpp @@ -14,8 +14,7 @@ // class = typename enable_if::value>::type> // iterator insert(const_iterator p, P&& x); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/db_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/db_iterators_7.pass.cpp index 117883020f57b..1192e1f4638c1 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multimap/db_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/db_iterators_7.pass.cpp @@ -10,8 +10,7 @@ // Increment iterator past end. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/db_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/db_iterators_8.pass.cpp index a5861fb8bad96..846028d8d9346 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multimap/db_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/db_iterators_8.pass.cpp @@ -10,8 +10,7 @@ // Dereference non-dereferenceable iterator. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/db_local_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/db_local_iterators_7.pass.cpp index a817f8108c980..c14e17122d36c 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multimap/db_local_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/db_local_iterators_7.pass.cpp @@ -10,8 +10,7 @@ // Increment local_iterator past end. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/db_local_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/db_local_iterators_8.pass.cpp index 9ac363e096805..46e91db56436e 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multimap/db_local_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/db_local_iterators_8.pass.cpp @@ -10,8 +10,7 @@ // Dereference non-dereferenceable iterator. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/db_move.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/db_move.pass.cpp index 3b1f23ac91a46..a4fb6e5e6acb0 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multimap/db_move.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/db_move.pass.cpp @@ -12,8 +12,7 @@ // unordered_multimap(unordered_multimap&& u); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db1.pass.cpp index da9362270a64f..36867de07d1f4 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db1.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator position) with end() -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db2.pass.cpp index 0e99ca4cefa7c..47a3c0d25e436 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db2.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator position) with iterator from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp index f8412d94e4560..5e8a6cec046ef 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with first iterator from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp index a028e11390d91..1865c790b6642 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with second iterator from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp index 5506af55707d8..44c0befb08934 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with both iterators from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp index 97119b843b2cc..1c3dca426d1dc 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with a bad range -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp index 73d9dc311fb2f..d2627f0e1c99e 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp @@ -14,8 +14,7 @@ // void swap(unordered_multimap& x, unordered_multimap& y); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multiset/db_insert_hint_const_lvalue.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/db_insert_hint_const_lvalue.pass.cpp index de604c1aca3cd..29bc62e6bd0a2 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multiset/db_insert_hint_const_lvalue.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/db_insert_hint_const_lvalue.pass.cpp @@ -10,8 +10,7 @@ // iterator insert(const_iterator p, const value_type& x); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multiset/db_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/db_iterators_7.pass.cpp index 89d3a5737d782..a813c23ed03e6 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multiset/db_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/db_iterators_7.pass.cpp @@ -10,8 +10,7 @@ // Increment iterator past end. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multiset/db_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/db_iterators_8.pass.cpp index 579bd84c97191..5bdab4d86b687 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multiset/db_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/db_iterators_8.pass.cpp @@ -10,8 +10,7 @@ // Dereference non-dereferenceable iterator. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multiset/db_local_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/db_local_iterators_7.pass.cpp index c85ed1da83551..4ad003f7ed9ab 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multiset/db_local_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/db_local_iterators_7.pass.cpp @@ -10,8 +10,7 @@ // Increment local_iterator past end. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multiset/db_local_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/db_local_iterators_8.pass.cpp index 597edd0fd521b..512912d70d850 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multiset/db_local_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/db_local_iterators_8.pass.cpp @@ -10,8 +10,7 @@ // Dereference non-dereferenceable iterator. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multiset/db_move.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/db_move.pass.cpp index 41da7ea4169e2..bc72a3bcfd15e 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multiset/db_move.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/db_move.pass.cpp @@ -12,8 +12,7 @@ // unordered_multiset(unordered_multiset&& u); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_db1.pass.cpp index b967c59a676f4..2aa205917c59f 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_db1.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator position) with end() -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_db2.pass.cpp index d704dcaa78063..fd09b2a0748e0 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_db2.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator position) with iterator from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db1.pass.cpp index 1a183e6658cab..7b680071caf36 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db1.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with first iterator from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db2.pass.cpp index de3ebaaac647a..a3202457bd5c7 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db2.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with second iterator from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db3.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db3.pass.cpp index 9d36b53f51ffd..b28abfb9b573f 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db3.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db3.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with both iterators from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db4.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db4.pass.cpp index a68f312b80162..e8234cdefb903 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db4.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/erase_iter_iter_db4.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with a bad range -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp index 2feba5c47f531..a0f23aeef85dc 100644 --- a/libcxx/test/libcxx/containers/unord/unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp @@ -14,8 +14,7 @@ // void swap(unordered_multiset& x, unordered_multiset& y); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.set/db_insert_hint_const_lvalue.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/db_insert_hint_const_lvalue.pass.cpp index 3303d089970f1..70b096c7b8124 100644 --- a/libcxx/test/libcxx/containers/unord/unord.set/db_insert_hint_const_lvalue.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/db_insert_hint_const_lvalue.pass.cpp @@ -10,8 +10,7 @@ // iterator insert(const_iterator p, const value_type& x); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.set/db_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/db_iterators_7.pass.cpp index 12e56ea1bce8c..90221a7801a1c 100644 --- a/libcxx/test/libcxx/containers/unord/unord.set/db_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/db_iterators_7.pass.cpp @@ -10,8 +10,7 @@ // Increment iterator past end. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.set/db_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/db_iterators_8.pass.cpp index 3333be8959fdc..3fb1c86bf063c 100644 --- a/libcxx/test/libcxx/containers/unord/unord.set/db_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/db_iterators_8.pass.cpp @@ -10,8 +10,7 @@ // Dereference non-dereferenceable iterator. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.set/db_local_iterators_7.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/db_local_iterators_7.pass.cpp index f003c2b2d763c..f506f423d1329 100644 --- a/libcxx/test/libcxx/containers/unord/unord.set/db_local_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/db_local_iterators_7.pass.cpp @@ -10,8 +10,7 @@ // Increment local_iterator past end. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.set/db_local_iterators_8.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/db_local_iterators_8.pass.cpp index 999ec8b1e48be..fe998c539dd59 100644 --- a/libcxx/test/libcxx/containers/unord/unord.set/db_local_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/db_local_iterators_8.pass.cpp @@ -10,8 +10,7 @@ // Dereference non-dereferenceable iterator. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.set/db_move.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/db_move.pass.cpp index 02f8368805289..5737270b04628 100644 --- a/libcxx/test/libcxx/containers/unord/unord.set/db_move.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/db_move.pass.cpp @@ -7,14 +7,12 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 +// UNSUPPORTED: libcxx-no-debug-mode // // unordered_set(unordered_set&& u); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx - #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_db1.pass.cpp index b5ddd8ca1b680..91314832dd9d7 100644 --- a/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_db1.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator position) with end() -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_db2.pass.cpp index bd14a3c0e6ca2..3763f01b65a74 100644 --- a/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_db2.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator position) with iterator from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db1.pass.cpp index 70a1afb9c4a17..40824dd12d82b 100644 --- a/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db1.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with first iterator from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db2.pass.cpp index 88f33d5d03be9..c492eee6e74db 100644 --- a/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db2.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with second iterator from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db3.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db3.pass.cpp index 8aa1b5a9390ad..a9e21fc4985f0 100644 --- a/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db3.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db3.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with both iterators from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db4.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db4.pass.cpp index 0922c65865d4c..666e9b4889710 100644 --- a/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db4.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/erase_iter_iter_db4.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with a bad range -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/containers/unord/unord.set/unord.set.swap/db_swap_1.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/unord.set.swap/db_swap_1.pass.cpp index b65cc69c9e72f..d66e7e3f491e5 100644 --- a/libcxx/test/libcxx/containers/unord/unord.set/unord.set.swap/db_swap_1.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/unord.set.swap/db_swap_1.pass.cpp @@ -14,8 +14,7 @@ // void swap(unordered_set& x, unordered_set& y); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/debug/containers/db_associative_container_tests.pass.cpp b/libcxx/test/libcxx/debug/containers/db_associative_container_tests.pass.cpp index d64daad2de400..afda15e871816 100644 --- a/libcxx/test/libcxx/debug/containers/db_associative_container_tests.pass.cpp +++ b/libcxx/test/libcxx/debug/containers/db_associative_container_tests.pass.cpp @@ -9,10 +9,9 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: windows // UNSUPPORTED: libcpp-no-if-constexpr -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 +// UNSUPPORTED: libcxx-no-debug-mode // test container debugging diff --git a/libcxx/test/libcxx/debug/containers/db_sequence_container_iterators.multithread.pass.cpp b/libcxx/test/libcxx/debug/containers/db_sequence_container_iterators.multithread.pass.cpp index bdfe3115a0c54..954fb1f69270b 100644 --- a/libcxx/test/libcxx/debug/containers/db_sequence_container_iterators.multithread.pass.cpp +++ b/libcxx/test/libcxx/debug/containers/db_sequence_container_iterators.multithread.pass.cpp @@ -10,10 +10,9 @@ // UNSUPPORTED: windows // UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: libcpp-no-if-constexpr -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 +// UNSUPPORTED: libcxx-no-debug-mode // test multihtreaded container debugging diff --git a/libcxx/test/libcxx/debug/containers/db_sequence_container_iterators.pass.cpp b/libcxx/test/libcxx/debug/containers/db_sequence_container_iterators.pass.cpp index adfd2d884b4e8..74a4ea504de5b 100644 --- a/libcxx/test/libcxx/debug/containers/db_sequence_container_iterators.pass.cpp +++ b/libcxx/test/libcxx/debug/containers/db_sequence_container_iterators.pass.cpp @@ -9,10 +9,9 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: windows // UNSUPPORTED: libcpp-no-if-constexpr -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 +// UNSUPPORTED: libcxx-no-debug-mode // test container debugging diff --git a/libcxx/test/libcxx/debug/containers/db_string.pass.cpp b/libcxx/test/libcxx/debug/containers/db_string.pass.cpp index 46e484be5b942..293a37c0d726a 100644 --- a/libcxx/test/libcxx/debug/containers/db_string.pass.cpp +++ b/libcxx/test/libcxx/debug/containers/db_string.pass.cpp @@ -9,10 +9,9 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: windows // UNSUPPORTED: libcpp-no-if-constexpr -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 +// UNSUPPORTED: libcxx-no-debug-mode // test container debugging diff --git a/libcxx/test/libcxx/debug/containers/db_unord_container_tests.pass.cpp b/libcxx/test/libcxx/debug/containers/db_unord_container_tests.pass.cpp index 3e57641795a96..6d4809a2c9324 100644 --- a/libcxx/test/libcxx/debug/containers/db_unord_container_tests.pass.cpp +++ b/libcxx/test/libcxx/debug/containers/db_unord_container_tests.pass.cpp @@ -9,10 +9,9 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: windows // UNSUPPORTED: libcpp-no-if-constexpr -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 +// UNSUPPORTED: libcxx-no-debug-mode // test container debugging diff --git a/libcxx/test/libcxx/debug/db_string_view.pass.cpp b/libcxx/test/libcxx/debug/db_string_view.pass.cpp index 4fca18f792ca1..c2285f09d3cd3 100644 --- a/libcxx/test/libcxx/debug/db_string_view.pass.cpp +++ b/libcxx/test/libcxx/debug/db_string_view.pass.cpp @@ -9,10 +9,9 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: windows // UNSUPPORTED: libcpp-no-if-constexpr -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 +// UNSUPPORTED: libcxx-no-debug-mode // test container debugging diff --git a/libcxx/test/libcxx/debug/debug_abort.pass.cpp b/libcxx/test/libcxx/debug/debug_abort.pass.cpp index 5d3c61223e2b5..277628f97ca05 100644 --- a/libcxx/test/libcxx/debug/debug_abort.pass.cpp +++ b/libcxx/test/libcxx/debug/debug_abort.pass.cpp @@ -8,9 +8,7 @@ //===----------------------------------------------------------------------===// // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=0 - -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode // Test that the default debug handler aborts the program. diff --git a/libcxx/test/libcxx/debug/debug_helper_test.pass.cpp b/libcxx/test/libcxx/debug/debug_helper_test.pass.cpp index 01f42a9e9d6e5..f6153b204e699 100644 --- a/libcxx/test/libcxx/debug/debug_helper_test.pass.cpp +++ b/libcxx/test/libcxx/debug/debug_helper_test.pass.cpp @@ -10,10 +10,8 @@ // UNSUPPORTED: c++03 // UNSUPPORTED: windows -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 +// UNSUPPORTED: libcxx-no-debug-mode #include <__debug> #include "test_macros.h" diff --git a/libcxx/test/libcxx/debug/debug_register.pass.cpp b/libcxx/test/libcxx/debug/debug_register.pass.cpp index d8d8021040ed3..829f35429bb01 100644 --- a/libcxx/test/libcxx/debug/debug_register.pass.cpp +++ b/libcxx/test/libcxx/debug/debug_register.pass.cpp @@ -8,9 +8,7 @@ //===----------------------------------------------------------------------===// // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 - -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #include #include diff --git a/libcxx/test/libcxx/input.output/filesystems/class.path/path.itr/iterator_db.pass.cpp b/libcxx/test/libcxx/input.output/filesystems/class.path/path.itr/iterator_db.pass.cpp index 559b8752b9273..8e25ac835e815 100644 --- a/libcxx/test/libcxx/input.output/filesystems/class.path/path.itr/iterator_db.pass.cpp +++ b/libcxx/test/libcxx/input.output/filesystems/class.path/path.itr/iterator_db.pass.cpp @@ -8,12 +8,10 @@ // UNSUPPORTED: c++03 // UNSUPPORTED: windows +// UNSUPPORTED: libcxx-no-debug-mode // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=0 -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx - // // class path diff --git a/libcxx/test/libcxx/iterators/advance.debug1.pass.cpp b/libcxx/test/libcxx/iterators/advance.debug1.pass.cpp index 9c180da10f04f..3b71f7f06255e 100644 --- a/libcxx/test/libcxx/iterators/advance.debug1.pass.cpp +++ b/libcxx/test/libcxx/iterators/advance.debug1.pass.cpp @@ -6,11 +6,10 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode // UNSUPPORTED: c++03 // UNSUPPORTED: windows -// UNSUPPORTED: with_system_cxx_lib=macosx // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=0 +// UNSUPPORTED: libcxx-no-debug-mode // diff --git a/libcxx/test/libcxx/iterators/next.debug1.pass.cpp b/libcxx/test/libcxx/iterators/next.debug1.pass.cpp index cf272a0c3aff6..e2eaf586bedd8 100644 --- a/libcxx/test/libcxx/iterators/next.debug1.pass.cpp +++ b/libcxx/test/libcxx/iterators/next.debug1.pass.cpp @@ -6,11 +6,10 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode // UNSUPPORTED: c++03 // UNSUPPORTED: windows -// UNSUPPORTED: with_system_cxx_lib=macosx // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=0 +// UNSUPPORTED: libcxx-no-debug-mode // diff --git a/libcxx/test/libcxx/iterators/prev.debug1.pass.cpp b/libcxx/test/libcxx/iterators/prev.debug1.pass.cpp index e929164b32ef6..35d54491c7d9c 100644 --- a/libcxx/test/libcxx/iterators/prev.debug1.pass.cpp +++ b/libcxx/test/libcxx/iterators/prev.debug1.pass.cpp @@ -6,12 +6,10 @@ // //===----------------------------------------------------------------------===// -// Can't test the system lib because this test enables debug mode -// UNSUPPORTED: with_system_cxx_lib=macosx - // UNSUPPORTED: c++03 // UNSUPPORTED: windows // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=0 +// UNSUPPORTED: libcxx-no-debug-mode // diff --git a/libcxx/test/libcxx/strings/basic.string/string.access/db_back.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.access/db_back.pass.cpp index 31000d092799c..ad85e741031f3 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.access/db_back.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.access/db_back.pass.cpp @@ -10,8 +10,7 @@ // Call back() on empty container. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.access/db_cback.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.access/db_cback.pass.cpp index 61d504f9bf3e9..2a6880993016e 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.access/db_cback.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.access/db_cback.pass.cpp @@ -10,8 +10,7 @@ // Call back() on empty const container. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.access/db_cfront.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.access/db_cfront.pass.cpp index 654c575d5b263..3ec6f14df8ca4 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.access/db_cfront.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.access/db_cfront.pass.cpp @@ -10,8 +10,7 @@ // Call front() on empty const container. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.access/db_cindex.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.access/db_cindex.pass.cpp index 2a5267eef875c..826a36b9ee442 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.access/db_cindex.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.access/db_cindex.pass.cpp @@ -10,8 +10,7 @@ // Index const string out of bounds. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.access/db_front.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.access/db_front.pass.cpp index c73c536df6c03..b37a42beb6e17 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.access/db_front.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.access/db_front.pass.cpp @@ -10,8 +10,7 @@ // Call front() on empty container. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.access/db_index.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.access/db_index.pass.cpp index ef250b01b3c82..d47fe5461387f 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.access/db_index.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.access/db_index.pass.cpp @@ -10,8 +10,7 @@ // Index string out of bounds. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_2.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_2.pass.cpp index df165b70f5d74..969f3243bab22 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_2.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_2.pass.cpp @@ -10,8 +10,7 @@ // Compare iterators from different containers with <. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_3.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_3.pass.cpp index 9f5146689f659..cd6bdcee0c3c3 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_3.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_3.pass.cpp @@ -10,8 +10,7 @@ // Subtract iterators from different containers with <. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_4.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_4.pass.cpp index 28aa876ce537b..89a4f0d472b09 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_4.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_4.pass.cpp @@ -10,8 +10,7 @@ // Index iterator out of bounds. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_5.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_5.pass.cpp index 9fd4dec8aa0c7..a1098674ab114 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_5.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_5.pass.cpp @@ -10,8 +10,7 @@ // Add to iterator out of bounds. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_6.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_6.pass.cpp index 802d6b4d986a7..cbc14c13daa5a 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_6.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_6.pass.cpp @@ -10,8 +10,7 @@ // Decrement iterator prior to begin. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_7.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_7.pass.cpp index 86b175f2e4f1c..156f53701f1da 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_7.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_7.pass.cpp @@ -10,8 +10,7 @@ // Increment iterator past end. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_8.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_8.pass.cpp index ca3521de189d5..f308a751dbde1 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_8.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.iterators/db_iterators_8.pass.cpp @@ -10,8 +10,7 @@ // Dereference non-dereferenceable iterator. -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/clear_and_shrink_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/clear_and_shrink_db1.pass.cpp index 5369a8238f9ab..345ad46c43b63 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/clear_and_shrink_db1.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/clear_and_shrink_db1.pass.cpp @@ -10,8 +10,7 @@ // Call __clear_and_shrink() and ensure string invariants hold -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_db1.pass.cpp index f9dd19c2ce7c9..501db8a841f49 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_db1.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator position) with end() -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_db2.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_db2.pass.cpp index 3e1b5fc952821..58b2c65ca1a59 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_db2.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator position) with iterator from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db1.pass.cpp index ce0690f93ca8c..e0f229a6448d3 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db1.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with first iterator from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db2.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db2.pass.cpp index 87e2f50389f55..c5faed89caf20 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db2.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db2.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with second iterator from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db3.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db3.pass.cpp index 848f34447f0fa..5b0179c0da940 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db3.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db3.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with both iterators from another container -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db4.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db4.pass.cpp index cb87f1f3769c4..827dddeae2f5e 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db4.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_iter_iter_db4.pass.cpp @@ -10,8 +10,7 @@ // Call erase(const_iterator first, const_iterator last); with a bad range -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_pop_back_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_pop_back_db1.pass.cpp index af93f57aae061..88fcccfbaef76 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_pop_back_db1.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/erase_pop_back_db1.pass.cpp @@ -10,8 +10,7 @@ // void pop_back(); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_char_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_char_db1.pass.cpp index e5814888d9fa7..5364da5c5bb46 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_char_db1.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_char_db1.pass.cpp @@ -10,8 +10,7 @@ // iterator insert(const_iterator p, charT c); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_iter_iter_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_iter_iter_db1.pass.cpp index 5bbe1468bf35f..87e7fb6835f5a 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_iter_iter_db1.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_iter_iter_db1.pass.cpp @@ -11,8 +11,7 @@ // template // iterator insert(const_iterator p, InputIterator first, InputIterator last); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_size_char_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_size_char_db1.pass.cpp index 2be5b082ed3d5..eff9fe515baed 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_size_char_db1.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_size_char_db1.pass.cpp @@ -10,8 +10,7 @@ // iterator insert(const_iterator p, size_type n, charT c); -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx +// UNSUPPORTED: libcxx-no-debug-mode #define _LIBCPP_DEBUG 1 #define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : std::exit(0)) diff --git a/libcxx/test/libcxx/thread/futures/futures.promise/set_exception.pass.cpp b/libcxx/test/libcxx/thread/futures/futures.promise/set_exception.pass.cpp index efa62f546b583..a79fdab4017e6 100644 --- a/libcxx/test/libcxx/thread/futures/futures.promise/set_exception.pass.cpp +++ b/libcxx/test/libcxx/thread/futures/futures.promise/set_exception.pass.cpp @@ -9,12 +9,10 @@ // UNSUPPORTED: windows // UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: c++03 +// UNSUPPORTED: libcxx-no-debug-mode // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=0 -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx - // // class promise diff --git a/libcxx/test/libcxx/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp b/libcxx/test/libcxx/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp index a72c4bc17db43..407b69fc63daf 100644 --- a/libcxx/test/libcxx/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp +++ b/libcxx/test/libcxx/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp @@ -9,12 +9,10 @@ // UNSUPPORTED: windows // UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: c++03 +// UNSUPPORTED: libcxx-no-debug-mode // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=0 -// This test requires debug mode, which the library on macOS doesn't have. -// UNSUPPORTED: with_system_cxx_lib=macosx - // // class promise diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml index 63fb5d5f8f424..929ee98be4640 100644 --- a/libcxx/utils/ci/buildkite-pipeline.yml +++ b/libcxx/utils/ci/buildkite-pipeline.yml @@ -75,6 +75,11 @@ steps: agents: queue: "libcxx-builders" + - label: "No debug mode" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh generic-nodebug | libcxx/utils/ci/phabricator-report" + agents: + queue: "libcxx-builders" + - label: "MacOS C++20" command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh generic-cxx2a | libcxx/utils/ci/phabricator-report" agents: diff --git a/libcxx/utils/ci/macos-backdeployment.sh b/libcxx/utils/ci/macos-backdeployment.sh index 04549aa346456..f91d7197e9e5a 100755 --- a/libcxx/utils/ci/macos-backdeployment.sh +++ b/libcxx/utils/ci/macos-backdeployment.sh @@ -131,6 +131,7 @@ fi echo "@@@ Running tests for libc++ @@@" "${LLVM_BUILD_DIR}/bin/llvm-lit" -sv "${MONOREPO_ROOT}/libcxx/test" \ --param=enable_experimental=false \ + --param=enable_debug_tests=false \ ${ENABLE_FILESYSTEM} \ --param=cxx_headers="${LLVM_INSTALL_DIR}/include/c++/v1" \ --param=std="${STD}" \ diff --git a/libcxx/utils/ci/run-buildbot.sh b/libcxx/utils/ci/run-buildbot.sh index 0dee6ae75737f..c62b1a935ad29 100755 --- a/libcxx/utils/ci/run-buildbot.sh +++ b/libcxx/utils/ci/run-buildbot.sh @@ -102,6 +102,12 @@ generic-singlethreaded) args+=("-DLIBCXXABI_ENABLE_THREADS=OFF") args+=("-DLIBCXX_ENABLE_MONOTONIC_CLOCK=OFF") ;; +generic-nodebug) + export CC=clang + export CXX=clang++ + args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") + args+=("-DLIBCXX_ENABLE_DEBUG_MODE=OFF") +;; x86_64-apple-system) export CC=clang export CXX=clang++ diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py index 175074a169b63..94686a66b05a8 100644 --- a/libcxx/utils/libcxx/test/params.py +++ b/libcxx/utils/libcxx/test/params.py @@ -45,6 +45,10 @@ Feature(name='c++experimental', linkFlag='-lc++experimental')), Parameter(name='long_tests', choices=[True, False], type=bool, default=True, - help="Whether to tests that take longer to run. This can be useful when running on a very slow device.", + help="Whether to enable tests that take longer to run. This can be useful when running on a very slow device.", feature=lambda enabled: Feature(name='long_tests') if enabled else None), + + Parameter(name='enable_debug_tests', choices=[True, False], type=bool, default=True, + help="Whether to enable tests that exercise the libc++ debugging mode.", + feature=lambda enabled: None if enabled else Feature(name='libcxx-no-debug-mode')), ] From 029290f1a6231507b82981d56c0a0d2b02d508e0 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 6 Oct 2020 17:39:08 +0200 Subject: [PATCH 220/321] [lldb/docs] Clarify python/swig version incompatibility The problematic combo is a debug python>=3.7 && swig<4.0. Differential Revision: https://reviews.llvm.org/D88906 --- lldb/docs/resources/build.rst | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lldb/docs/resources/build.rst b/lldb/docs/resources/build.rst index e22db7f6d8f9a..b4e58ca977a9d 100644 --- a/lldb/docs/resources/build.rst +++ b/lldb/docs/resources/build.rst @@ -76,6 +76,12 @@ commands below. > pkgin install swig python27 cmake ninja-build > brew install swig cmake ninja +Note that there's an `incompatibility +` between Python version 3.7 and later +and swig versions older than 4.0.0 which makes builds of LLDB using debug +versions of python unusable. This primarily affects Windows, as debug builds of +LLDB must use debug python as well. + Windows ******* @@ -83,10 +89,9 @@ Windows * The latest Windows SDK. * The Active Template Library (ATL). * `GnuWin32 `_ for CoreUtils and Make. -* `Python 3.6 or 3.8 `_. Python 3.7 - is known to be incompatible. Make sure to (1) get the x64 variant if that's - what you're targetting and (2) install the debug library if you want to build - a debug lldb. +* `Python 3 `_. Make sure to (1) get + the x64 variant if that's what you're targetting and (2) install the debug + library if you want to build a debug lldb. * `Python Tools for Visual Studio `_. If you plan to debug test failures or even write new tests at all, PTVS is an indispensable debugging From 3dfb94986170c57d9b3f5f2cba039a2eab5e6f13 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Fri, 2 Oct 2020 13:38:09 +0200 Subject: [PATCH 221/321] [lldb] Check for and use ptsname_r if available ptsname is not thread-safe. ptsname_r is available on most (but not all) systems -- use it preferentially. In the patch I also improve the thread-safety of the ptsname fallback path by wrapping it in a mutex. This should guarantee the safety of a typical ptsname implementation using a single static buffer, as long as all callers go through this function. I also remove the error arguments, as the only way this function can fail is if the "primary" fd is not valid. This is a programmer error as this requirement is documented, and all callers ensure that is the case. Differential Revision: https://reviews.llvm.org/D88728 --- lldb/cmake/modules/LLDBGenerateConfig.cmake | 1 + lldb/include/lldb/Host/Config.h.cmake | 2 + lldb/include/lldb/Host/PseudoTerminal.h | 13 +---- lldb/source/Host/common/ProcessLaunchInfo.cpp | 2 +- lldb/source/Host/common/PseudoTerminal.cpp | 58 +++++++------------ ...latformiOSSimulatorCoreSimulatorSupport.mm | 32 +++++----- .../Process/FreeBSD/ProcessFreeBSD.cpp | 3 +- .../Process/gdb-remote/ProcessGDBRemote.cpp | 2 +- 8 files changed, 43 insertions(+), 70 deletions(-) diff --git a/lldb/cmake/modules/LLDBGenerateConfig.cmake b/lldb/cmake/modules/LLDBGenerateConfig.cmake index 0d3a7fdb1816b..caeb3969002bb 100644 --- a/lldb/cmake/modules/LLDBGenerateConfig.cmake +++ b/lldb/cmake/modules/LLDBGenerateConfig.cmake @@ -7,6 +7,7 @@ include(CheckLibraryExists) set(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE) check_symbol_exists(ppoll poll.h HAVE_PPOLL) +check_symbol_exists(ptsname_r stdlib.h HAVE_PTSNAME_R) set(CMAKE_REQUIRED_DEFINITIONS) check_symbol_exists(sigaction signal.h HAVE_SIGACTION) check_cxx_symbol_exists(accept4 "sys/socket.h" HAVE_ACCEPT4) diff --git a/lldb/include/lldb/Host/Config.h.cmake b/lldb/include/lldb/Host/Config.h.cmake index 7467f429b6287..671d71d1c4e32 100644 --- a/lldb/include/lldb/Host/Config.h.cmake +++ b/lldb/include/lldb/Host/Config.h.cmake @@ -20,6 +20,8 @@ #cmakedefine01 HAVE_PPOLL +#cmakedefine01 HAVE_PTSNAME_R + #cmakedefine01 HAVE_SIGACTION #cmakedefine01 HAVE_PROCESS_VM_READV diff --git a/lldb/include/lldb/Host/PseudoTerminal.h b/lldb/include/lldb/Host/PseudoTerminal.h index 8a5a233e77484..c2258f15e5510 100644 --- a/lldb/include/lldb/Host/PseudoTerminal.h +++ b/lldb/include/lldb/Host/PseudoTerminal.h @@ -105,20 +105,11 @@ class PseudoTerminal { /// A primary pseudo terminal should already be valid prior to /// calling this function. /// - /// \param[out] error_str - /// An pointer to an error that can describe any errors that - /// occur. This can be NULL if no error status is desired. - /// /// \return - /// The name of the secondary pseudo terminal as a NULL terminated - /// C. This string that comes from static memory, so a copy of - /// the string should be made as subsequent calls can change - /// this value. NULL is returned if this object doesn't have - /// a valid primary pseudo terminal opened or if the call to - /// \c ptsname() fails. + /// The name of the secondary pseudo terminal. /// /// \see PseudoTerminal::OpenFirstAvailablePrimary() - const char *GetSecondaryName(char *error_str, size_t error_len) const; + std::string GetSecondaryName() const; /// Open the first available pseudo terminal. /// diff --git a/lldb/source/Host/common/ProcessLaunchInfo.cpp b/lldb/source/Host/common/ProcessLaunchInfo.cpp index 4bc8cda7a0067..a4729a28ce74c 100644 --- a/lldb/source/Host/common/ProcessLaunchInfo.cpp +++ b/lldb/source/Host/common/ProcessLaunchInfo.cpp @@ -222,7 +222,7 @@ llvm::Error ProcessLaunchInfo::SetUpPtyRedirection() { return llvm::createStringError(llvm::inconvertibleErrorCode(), "PTY::OpenFirstAvailablePrimary failed"); } - const FileSpec secondary_file_spec(m_pty->GetSecondaryName(nullptr, 0)); + const FileSpec secondary_file_spec(m_pty->GetSecondaryName()); // Only use the secondary tty if we don't have anything specified for // input and don't have an action for stdin diff --git a/lldb/source/Host/common/PseudoTerminal.cpp b/lldb/source/Host/common/PseudoTerminal.cpp index 72549f1c88ab6..4668b09f4fdbd 100644 --- a/lldb/source/Host/common/PseudoTerminal.cpp +++ b/lldb/source/Host/common/PseudoTerminal.cpp @@ -8,9 +8,11 @@ #include "lldb/Host/PseudoTerminal.h" #include "lldb/Host/Config.h" - +#include "llvm/Support/Errc.h" #include "llvm/Support/Errno.h" - +#include +#include +#include #include #include #include @@ -128,15 +130,8 @@ bool PseudoTerminal::OpenSecondary(int oflag, char *error_str, CloseSecondaryFileDescriptor(); - // Open the primary side of a pseudo terminal - const char *secondary_name = GetSecondaryName(error_str, error_len); - - if (secondary_name == nullptr) - return false; - - m_secondary_fd = - llvm::sys::RetryAfterSignal(-1, ::open, secondary_name, oflag); - + std::string name = GetSecondaryName(); + m_secondary_fd = llvm::sys::RetryAfterSignal(-1, ::open, name.c_str(), oflag); if (m_secondary_fd < 0) { if (error_str) ErrnoToStr(error_str, error_len); @@ -146,32 +141,21 @@ bool PseudoTerminal::OpenSecondary(int oflag, char *error_str, return true; } -// Get the name of the secondary pseudo terminal. A primary pseudo terminal -// should already be valid prior to calling this function (see -// OpenFirstAvailablePrimary()). -// -// RETURNS: -// NULL if no valid primary pseudo terminal or if ptsname() fails. -// The name of the secondary pseudo terminal as a NULL terminated C string -// that comes from static memory, so a copy of the string should be -// made as subsequent calls can change this value. -const char *PseudoTerminal::GetSecondaryName(char *error_str, - size_t error_len) const { - if (error_str) - error_str[0] = '\0'; - - if (m_primary_fd < 0) { - if (error_str) - ::snprintf(error_str, error_len, "%s", - "primary file descriptor is invalid"); - return nullptr; - } - const char *secondary_name = ::ptsname(m_primary_fd); - - if (error_str && secondary_name == nullptr) - ErrnoToStr(error_str, error_len); - - return secondary_name; +std::string PseudoTerminal::GetSecondaryName() const { + assert(m_primary_fd >= 0); +#if HAVE_PTSNAME_R + char buf[PATH_MAX]; + buf[0] = '\0'; + int r = ptsname_r(m_primary_fd, buf, sizeof(buf)); + assert(r == 0); + return buf; +#else + static std::mutex mutex; + std::lock_guard guard(mutex); + const char *r = ptsname(m_primary_fd); + assert(r != nullptr); + return r; +#endif } // Fork a child process and have its stdio routed to a pseudo terminal. diff --git a/lldb/source/Plugins/Platform/MacOSX/objcxx/PlatformiOSSimulatorCoreSimulatorSupport.mm b/lldb/source/Plugins/Platform/MacOSX/objcxx/PlatformiOSSimulatorCoreSimulatorSupport.mm index 8f36640a66a51..cfd44f9ae5ce4 100644 --- a/lldb/source/Plugins/Platform/MacOSX/objcxx/PlatformiOSSimulatorCoreSimulatorSupport.mm +++ b/lldb/source/Plugins/Platform/MacOSX/objcxx/PlatformiOSSimulatorCoreSimulatorSupport.mm @@ -407,25 +407,21 @@ static Status HandleFileAction(ProcessLaunchInfo &launch_info, const int master_fd = launch_info.GetPTY().GetPrimaryFileDescriptor(); if (master_fd != PseudoTerminal::invalid_fd) { // Check in case our file action open wants to open the secondary - const char *secondary_path = - launch_info.GetPTY().GetSecondaryName(NULL, 0); - if (secondary_path) { - FileSpec secondary_spec(secondary_path); - if (file_spec == secondary_spec) { - int secondary_fd = - launch_info.GetPTY().GetSecondaryFileDescriptor(); - if (secondary_fd == PseudoTerminal::invalid_fd) - secondary_fd = - launch_info.GetPTY().OpenSecondary(O_RDWR, nullptr, 0); - if (secondary_fd == PseudoTerminal::invalid_fd) { - error.SetErrorStringWithFormat( - "unable to open secondary pty '%s'", secondary_path); - return error; // Failure - } - [options setValue:[NSNumber numberWithInteger:secondary_fd] - forKey:key]; - return error; // Success + FileSpec secondary_spec(launch_info.GetPTY().GetSecondaryName()); + if (file_spec == secondary_spec) { + int secondary_fd = + launch_info.GetPTY().GetSecondaryFileDescriptor(); + if (secondary_fd == PseudoTerminal::invalid_fd) + secondary_fd = + launch_info.GetPTY().OpenSecondary(O_RDWR, nullptr, 0); + if (secondary_fd == PseudoTerminal::invalid_fd) { + error.SetErrorStringWithFormat( + "unable to open secondary pty '%s'", secondary_path); + return error; // Failure } + [options setValue:[NSNumber numberWithInteger:secondary_fd] + forKey:key]; + return error; // Success } } Status posix_error; diff --git a/lldb/source/Plugins/Process/FreeBSD/ProcessFreeBSD.cpp b/lldb/source/Plugins/Process/FreeBSD/ProcessFreeBSD.cpp index f1a424ccbca57..67a18bd8de13b 100644 --- a/lldb/source/Plugins/Process/FreeBSD/ProcessFreeBSD.cpp +++ b/lldb/source/Plugins/Process/FreeBSD/ProcessFreeBSD.cpp @@ -380,8 +380,7 @@ Status ProcessFreeBSD::DoLaunch(Module *module, FileSpec stdout_file_spec{}; FileSpec stderr_file_spec{}; - const FileSpec dbg_pts_file_spec{ - launch_info.GetPTY().GetSecondaryName(NULL, 0)}; + const FileSpec dbg_pts_file_spec{launch_info.GetPTY().GetSecondaryName()}; file_action = launch_info.GetFileActionForFD(STDIN_FILENO); stdin_file_spec = diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 8dea8b9809854..9adf25f00b3ec 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -818,7 +818,7 @@ Status ProcessGDBRemote::DoLaunch(lldb_private::Module *exe_module, // does a lot of output. if ((!stdin_file_spec || !stdout_file_spec || !stderr_file_spec) && pty.OpenFirstAvailablePrimary(O_RDWR | O_NOCTTY, nullptr, 0)) { - FileSpec secondary_name{pty.GetSecondaryName(nullptr, 0)}; + FileSpec secondary_name(pty.GetSecondaryName()); if (!stdin_file_spec) stdin_file_spec = secondary_name; From 62d4ee5b7a8b07001fce320b0d18d5a61593cc96 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 7 Oct 2020 09:27:19 -0400 Subject: [PATCH 222/321] [libc++] Use the existing CMake caches when running build bots --- libcxx/utils/ci/run-buildbot.sh | 35 +++++++++++++++++---------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/libcxx/utils/ci/run-buildbot.sh b/libcxx/utils/ci/run-buildbot.sh index c62b1a935ad29..279291d508889 100755 --- a/libcxx/utils/ci/run-buildbot.sh +++ b/libcxx/utils/ci/run-buildbot.sh @@ -21,40 +21,44 @@ case "${BUILDER}" in generic-cxx03) export CC=clang export CXX=clang++ - args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported --param=std=c++03") + args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") + args+=("-C${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx03.cmake") ;; generic-cxx11) export CC=clang export CXX=clang++ - args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported --param=std=c++11") + args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") + args+=("-C${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx11.cmake") ;; generic-cxx14) export CC=clang export CXX=clang++ - args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported --param=std=c++14") + args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") + args+=("-C${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx14.cmake") ;; generic-cxx17) export CC=clang export CXX=clang++ - args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported --param=std=c++17") + args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") + args+=("-C${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx17.cmake") ;; generic-cxx2a) export CC=clang export CXX=clang++ - args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported --param=std=c++2a") + args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") + args+=("-C${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx2a.cmake") ;; generic-noexceptions) export CC=clang export CXX=clang++ args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") - args+=("-DLIBCXX_ENABLE_EXCEPTIONS=OFF") - args+=("-DLIBCXXABI_ENABLE_EXCEPTIONS=OFF") + args+=("-C${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-noexceptions.cmake") ;; generic-32bit) export CC=clang export CXX=clang++ args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") - args+=("-DLLVM_BUILD_32_BITS=ON") + args+=("-C${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-32bits.cmake") ;; generic-gcc) export CC=gcc @@ -66,27 +70,26 @@ generic-gcc) generic-asan) export CC=clang export CXX=clang++ - args+=("-DLLVM_USE_SANITIZER=Address") args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") + args+=("-C${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-asan.cmake") ;; generic-msan) export CC=clang export CXX=clang++ - args+=("-DLLVM_USE_SANITIZER=MemoryWithOrigins") args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") + args+=("-C${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-msan.cmake") ;; generic-tsan) export CC=clang export CXX=clang++ - args+=("-DLLVM_USE_SANITIZER=Thread") args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") + args+=("-C${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-tsan.cmake") ;; generic-ubsan) export CC=clang export CXX=clang++ - args+=("-DLLVM_USE_SANITIZER=Undefined") - args+=("-DLIBCXX_ABI_UNSTABLE=ON") args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") + args+=("-C${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-ubsan.cmake") ;; generic-with_llvm_unwinder) export CC=clang @@ -98,15 +101,13 @@ generic-singlethreaded) export CC=clang export CXX=clang++ args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") - args+=("-DLIBCXX_ENABLE_THREADS=OFF") - args+=("-DLIBCXXABI_ENABLE_THREADS=OFF") - args+=("-DLIBCXX_ENABLE_MONOTONIC_CLOCK=OFF") + args+=("-C${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-singlethreaded.cmake") ;; generic-nodebug) export CC=clang export CXX=clang++ args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") - args+=("-DLIBCXX_ENABLE_DEBUG_MODE=OFF") + args+=("-C${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-nodebug.cmake") ;; x86_64-apple-system) export CC=clang From 4a7e7620d6c64b9421f939f72e25c28563683eb7 Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Wed, 7 Oct 2020 16:11:37 +0300 Subject: [PATCH 223/321] [AMDGPU][MC] Improved diagnostics for instructions with missing features Reviewers: rampitec Differential Revision: https://reviews.llvm.org/D88887 --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 6 +- llvm/test/MC/AMDGPU/add-sub-no-carry.s | 67 +- llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s | 24 +- llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s | 24 +- llvm/test/MC/AMDGPU/gfx1030_err.s | 2 +- llvm/test/MC/AMDGPU/gfx10_asm_all.s | 6412 ++++++++--------- llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s | 24 +- llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s | 20 +- llvm/test/MC/AMDGPU/gfx10_asm_mimg_err.s | 26 +- llvm/test/MC/AMDGPU/lds_direct-err.s | 2 +- llvm/test/MC/AMDGPU/literals.s | 9 +- llvm/test/MC/AMDGPU/mtbuf.s | 2 +- llvm/test/MC/AMDGPU/mubuf.s | 58 +- llvm/test/MC/AMDGPU/smem.s | 9 +- llvm/test/MC/AMDGPU/smrd.s | 8 +- llvm/test/MC/AMDGPU/vop2.s | 2 +- llvm/test/MC/AMDGPU/vop3-gfx9.s | 76 +- llvm/test/MC/AMDGPU/vop3-literal.s | 20 +- llvm/test/MC/AMDGPU/vop_sdwa.s | 30 +- llvm/test/MC/AMDGPU/wave32.s | 62 +- 20 files changed, 3473 insertions(+), 3410 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 9e5c7b828c901..cdb686fe00439 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -3882,8 +3882,10 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, switch (Result) { default: break; case Match_MissingFeature: - // FIXME: this case should be analyzed and error message corrected. - return Error(IDLoc, "instruction not supported on this GPU"); + // It has been verified that the specified instruction + // mnemonic is valid. A match was found but it requires + // features which are not supported on this GPU. + return Error(IDLoc, "operands are not valid for this GPU or mode"); case Match_InvalidOperand: { SMLoc ErrorLoc = IDLoc; diff --git a/llvm/test/MC/AMDGPU/add-sub-no-carry.s b/llvm/test/MC/AMDGPU/add-sub-no-carry.s index 884d1dd850722..2e3ac9d24376a 100644 --- a/llvm/test/MC/AMDGPU/add-sub-no-carry.s +++ b/llvm/test/MC/AMDGPU/add-sub-no-carry.s @@ -1,94 +1,115 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefixes=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefixes=ERR-VI,ERR-SICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefixes=ERR-SICI,ERR-SICIVI --implicit-check-not=error: %s // FIXME: pre-gfx9 errors should be more useful v_add_u32 v1, v2, v3 // GFX9: v_add_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x68] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_add_u32 v1, v2, s1 // GFX9: v_add_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0x03,0x00,0x00] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_add_u32 v1, s1, v2 // GFX9: v_add_u32_e32 v1, s1, v2 ; encoding: [0x01,0x04,0x02,0x68] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_add_u32 v1, 4.0, v2 // GFX9: v_add_u32_e32 v1, 4.0, v2 ; encoding: [0xf6,0x04,0x02,0x68] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_add_u32 v1, v2, 4.0 // GFX9: v_add_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0xed,0x01,0x00] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_add_u32_e32 v1, v2, v3 // GFX9: v_add_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x68] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_add_u32_e32 v1, s1, v3 // GFX9: v_add_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x68] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_sub_u32 v1, v2, v3 // GFX9: v_sub_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6a] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_sub_u32 v1, v2, s1 // GFX9: v_sub_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0x03,0x00,0x00] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_sub_u32 v1, s1, v2 // GFX9: v_sub_u32_e32 v1, s1, v2 ; encoding: [0x01,0x04,0x02,0x6a] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_sub_u32 v1, 4.0, v2 // GFX9: v_sub_u32_e32 v1, 4.0, v2 ; encoding: [0xf6,0x04,0x02,0x6a] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_sub_u32 v1, v2, 4.0 // GFX9: v_sub_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0xed,0x01,0x00] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_sub_u32_e32 v1, v2, v3 // GFX9: v_sub_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6a] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_sub_u32_e32 v1, s1, v3 // GFX9: v_sub_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x6a] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_subrev_u32 v1, v2, v3 // GFX9: v_subrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6c] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_subrev_u32 v1, v2, s1 // GFX9: v_subrev_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0x03,0x00,0x00] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_subrev_u32 v1, s1, v2 // GFX9: v_subrev_u32_e32 v1, s1, v2 ; encoding: [0x01,0x04,0x02,0x6c] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_subrev_u32 v1, 4.0, v2 // GFX9: v_subrev_u32_e32 v1, 4.0, v2 ; encoding: [0xf6,0x04,0x02,0x6c] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_subrev_u32 v1, v2, 4.0 // GFX9: v_subrev_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0xed,0x01,0x00] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_subrev_u32_e32 v1, v2, v3 // GFX9: v_subrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6c] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode v_subrev_u32_e32 v1, s1, v3 // GFX9: v_subrev_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x6c] -// ERR-SICIVI: error: instruction not supported on this GPU +// ERR-SICI: error: instruction not supported on this GPU +// ERR-VI: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s b/llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s index 86efb1883339b..ab51b14e54543 100644 --- a/llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s +++ b/llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s @@ -13,30 +13,30 @@ buffer_load_format_d16_x v1, off, s[4:7], s1 buffer_load_format_d16_xy v1, off, s[4:7], s1 // PACKED: buffer_load_format_d16_xy v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01] -// UNPACKED-ERR: error: instruction not supported on this GPU +// UNPACKED-ERR: error: operands are not valid for this GPU or mode buffer_load_format_d16_xyz v[1:2], off, s[4:7], s1 // PACKED: buffer_load_format_d16_xyz v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x01,0x01,0x01] -// UNPACKED-ERR: error: instruction not supported on this GPU +// UNPACKED-ERR: error: operands are not valid for this GPU or mode buffer_load_format_d16_xyzw v[1:2], off, s[4:7], s1 // PACKED: buffer_load_format_d16_xyzw v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x01,0x01,0x01] -// UNPACKED-ERR: error: instruction not supported on this GPU +// UNPACKED-ERR: error: operands are not valid for this GPU or mode buffer_store_format_d16_x v1, off, s[4:7], s1 // PACKED: buffer_store_format_d16_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01] buffer_store_format_d16_xy v1, off, s[4:7], s1 // PACKED: buffer_store_format_d16_xy v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01] -// UNPACKED-ERR: error: instruction not supported on this GPU +// UNPACKED-ERR: error: operands are not valid for this GPU or mode buffer_store_format_d16_xyz v[1:2], off, s[4:7], s1 // PACKED: buffer_store_format_d16_xyz v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01] -// UNPACKED-ERR: error: instruction not supported on this GPU +// UNPACKED-ERR: error: operands are not valid for this GPU or mode buffer_store_format_d16_xyzw v[1:2], off, s[4:7], s1 // PACKED: buffer_store_format_d16_xyzw v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x3c,0xe0,0x00,0x01,0x01,0x01] -// UNPACKED-ERR: error: instruction not supported on this GPU +// UNPACKED-ERR: error: operands are not valid for this GPU or mode //===----------------------------------------------------------------------===// @@ -48,27 +48,27 @@ tbuffer_load_format_d16_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 tbuffer_load_format_d16_xy v1, off, s[4:7], dfmt:15, nfmt:2, s1 // PACKED: tbuffer_load_format_d16_xy v1, off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x7c,0xe9,0x00,0x01,0x01,0x01] -// UNPACKED-ERR: error: instruction not supported on this GPU +// UNPACKED-ERR: error: operands are not valid for this GPU or mode tbuffer_load_format_d16_xyz v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 // PACKED: tbuffer_load_format_d16_xyz v[1:2], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x00,0x7d,0xe9,0x00,0x01,0x01,0x01] -// UNPACKED-ERR: error: instruction not supported on this GPU +// UNPACKED-ERR: error: operands are not valid for this GPU or mode tbuffer_load_format_d16_xyzw v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 // PACKED: tbuffer_load_format_d16_xyzw v[1:2], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x7d,0xe9,0x00,0x01,0x01,0x01] -// UNPACKED-ERR: error: instruction not supported on this GPU +// UNPACKED-ERR: error: operands are not valid for this GPU or mode tbuffer_store_format_d16_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 // PACKED: tbuffer_store_format_d16_x v1, off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x00,0x7e,0xe9,0x00,0x01,0x01,0x01] tbuffer_store_format_d16_xy v1, off, s[4:7], dfmt:15, nfmt:2, s1 // PACKED: tbuffer_store_format_d16_xy v1, off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x7e,0xe9,0x00,0x01,0x01,0x01] -// UNPACKED-ERR: error: instruction not supported on this GPU +// UNPACKED-ERR: error: operands are not valid for this GPU or mode tbuffer_store_format_d16_xyz v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 // PACKED: tbuffer_store_format_d16_xyz v[1:2], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x00,0x7f,0xe9,0x00,0x01,0x01,0x01] -// UNPACKED-ERR: error: instruction not supported on this GPU +// UNPACKED-ERR: error: operands are not valid for this GPU or mode tbuffer_store_format_d16_xyzw v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 // PACKED: tbuffer_store_format_d16_xyzw v[1:2], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x7f,0xe9,0x00,0x01,0x01,0x01] -// UNPACKED-ERR: error: instruction not supported on this GPU +// UNPACKED-ERR: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s b/llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s index f8e6407c0548e..78ca007171a57 100644 --- a/llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s +++ b/llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s @@ -12,30 +12,30 @@ buffer_load_format_d16_x v1, off, s[4:7], s1 buffer_load_format_d16_xy v[1:2], off, s[4:7], s1 // UNPACKED: buffer_load_format_d16_xy v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01] -// PACKED-ERR: error: instruction not supported on this GPU +// PACKED-ERR: error: operands are not valid for this GPU or mode buffer_load_format_d16_xyz v[1:3], off, s[4:7], s1 // UNPACKED: buffer_load_format_d16_xyz v[1:3], off, s[4:7], s1 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x01,0x01,0x01] -// PACKED-ERR: error: instruction not supported on this GPU +// PACKED-ERR: error: operands are not valid for this GPU or mode buffer_load_format_d16_xyzw v[1:4], off, s[4:7], s1 // UNPACKED: buffer_load_format_d16_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x01,0x01,0x01] -// PACKED-ERR: error: instruction not supported on this GPU +// PACKED-ERR: error: operands are not valid for this GPU or mode buffer_store_format_d16_x v1, off, s[4:7], s1 // UNPACKED: buffer_store_format_d16_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01] buffer_store_format_d16_xy v[1:2], off, s[4:7], s1 // UNPACKED: buffer_store_format_d16_xy v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01] -// PACKED-ERR: error: instruction not supported on this GPU +// PACKED-ERR: error: operands are not valid for this GPU or mode buffer_store_format_d16_xyz v[1:3], off, s[4:7], s1 // UNPACKED: buffer_store_format_d16_xyz v[1:3], off, s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01] -// PACKED-ERR: error: instruction not supported on this GPU +// PACKED-ERR: error: operands are not valid for this GPU or mode buffer_store_format_d16_xyzw v[1:4], off, s[4:7], s1 // UNPACKED: buffer_store_format_d16_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x3c,0xe0,0x00,0x01,0x01,0x01] -// PACKED-ERR: error: instruction not supported on this GPU +// PACKED-ERR: error: operands are not valid for this GPU or mode //===----------------------------------------------------------------------===// @@ -47,27 +47,27 @@ tbuffer_load_format_d16_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 tbuffer_load_format_d16_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 // UNPACKED: tbuffer_load_format_d16_xy v[1:2], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x7c,0xe9,0x00,0x01,0x01,0x01] -// PACKED-ERR: error: instruction not supported on this GPU +// PACKED-ERR: error: operands are not valid for this GPU or mode tbuffer_load_format_d16_xyz v[1:3], off, s[4:7], dfmt:15, nfmt:2, s1 // UNPACKED: tbuffer_load_format_d16_xyz v[1:3], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x00,0x7d,0xe9,0x00,0x01,0x01,0x01] -// PACKED-ERR: error: instruction not supported on this GPU +// PACKED-ERR: error: operands are not valid for this GPU or mode tbuffer_load_format_d16_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1 // UNPACKED: tbuffer_load_format_d16_xyzw v[1:4], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x7d,0xe9,0x00,0x01,0x01,0x01] -// PACKED-ERR: error: instruction not supported on this GPU +// PACKED-ERR: error: operands are not valid for this GPU or mode tbuffer_store_format_d16_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 // UNPACKED: tbuffer_store_format_d16_x v1, off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x00,0x7e,0xe9,0x00,0x01,0x01,0x01] tbuffer_store_format_d16_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 // UNPACKED: tbuffer_store_format_d16_xy v[1:2], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x7e,0xe9,0x00,0x01,0x01,0x01] -// PACKED-ERR: error: instruction not supported on this GPU +// PACKED-ERR: error: operands are not valid for this GPU or mode tbuffer_store_format_d16_xyz v[1:3], off, s[4:7], dfmt:15, nfmt:2, s1 // UNPACKED: tbuffer_store_format_d16_xyz v[1:3], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x00,0x7f,0xe9,0x00,0x01,0x01,0x01] -// PACKED-ERR: error: instruction not supported on this GPU +// PACKED-ERR: error: operands are not valid for this GPU or mode tbuffer_store_format_d16_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1 // UNPACKED: tbuffer_store_format_d16_xyzw v[1:4], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x7f,0xe9,0x00,0x01,0x01,0x01] -// PACKED-ERR: error: instruction not supported on this GPU +// PACKED-ERR: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx1030_err.s b/llvm/test/MC/AMDGPU/gfx1030_err.s index b8a1cb3efec33..f7134a30c6f8e 100644 --- a/llvm/test/MC/AMDGPU/gfx1030_err.s +++ b/llvm/test/MC/AMDGPU/gfx1030_err.s @@ -26,7 +26,7 @@ s_getreg_b32 s2, hwreg(HW_REG_XNACK_MASK) // GFX10: error: specified hardware register is not supported on this GPU v_mac_f32 v0, v1, v2 -// GFX10: error: instruction not supported on this GPU +// GFX10: error: operands are not valid for this GPU or mode v_mad_f32 v0, v1, v2, v3 // GFX10: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_all.s b/llvm/test/MC/AMDGPU/gfx10_asm_all.s index 59c49220111ce..6f3d814153c5e 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_all.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_all.s @@ -36894,35 +36894,35 @@ v_swaprel_b32 v5, v255 v_cndmask_b32_e32 v5, v1, v2, vcc // W64: encoding: [0x01,0x05,0x0a,0x02] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e32 v255, v1, v2, vcc // W64: encoding: [0x01,0x05,0xfe,0x03] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e32 v5, v255, v2, vcc // W64: encoding: [0xff,0x05,0x0a,0x02] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e32 v5, 0, v2, vcc // W64: encoding: [0x80,0x04,0x0a,0x02] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e32 v5, -1, v2, vcc // W64: encoding: [0xc1,0x04,0x0a,0x02] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e32 v5, 0.5, v2, vcc // W64: encoding: [0xf0,0x04,0x0a,0x02] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e32 v5, -4.0, v2, vcc // W64: encoding: [0xf7,0x04,0x0a,0x02] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e32 v5, v1, v255, vcc // W64: encoding: [0x01,0xff,0x0b,0x02] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e64 v5, v1, v2, s[6:7] // W64: encoding: [0x05,0x00,0x01,0xd5,0x01,0x05,0x1a,0x00] @@ -36982,39 +36982,39 @@ v_cndmask_b32_e64 v5, v1, v2, s[100:101] v_cndmask_b32_e64 v5, v1, v2, vcc // W64: encoding: [0x05,0x00,0x01,0xd5,0x01,0x05,0xaa,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e32 v5, v1, v2, vcc_lo // W32: encoding: [0x01,0x05,0x0a,0x02] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e32 v255, v1, v2, vcc_lo // W32: encoding: [0x01,0x05,0xfe,0x03] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e32 v5, v255, v2, vcc_lo // W32: encoding: [0xff,0x05,0x0a,0x02] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e32 v5, 0, v2, vcc_lo // W32: encoding: [0x80,0x04,0x0a,0x02] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e32 v5, -1, v2, vcc_lo // W32: encoding: [0xc1,0x04,0x0a,0x02] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e32 v5, 0.5, v2, vcc_lo // W32: encoding: [0xf0,0x04,0x0a,0x02] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e32 v5, -4.0, v2, vcc_lo // W32: encoding: [0xf7,0x04,0x0a,0x02] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e32 v5, v1, v255, vcc_lo // W32: encoding: [0x01,0xff,0x0b,0x02] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cndmask_b32_e64 v5, v1, v2, s6 // W32: encoding: [0x05,0x00,0x01,0xd5,0x01,0x05,0x1a,0x00] @@ -37074,7 +37074,7 @@ v_cndmask_b32_e64 v5, v1, v2, s100 v_cndmask_b32_e64 v5, v1, v2, vcc_lo // W32: encoding: [0x05,0x00,0x01,0xd5,0x01,0x05,0xaa,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_f32_e32 v5, v1, v2 // GFX10: encoding: [0x01,0x05,0x0a,0x06] @@ -45559,35 +45559,35 @@ v_subrev_nc_u32_sdwa v5, v1, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_s v_add_co_ci_u32_e32 v5, vcc, v1, v2, vcc // W64: encoding: [0x01,0x05,0x0a,0x50] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e32 v255, vcc, v1, v2, vcc // W64: encoding: [0x01,0x05,0xfe,0x51] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e32 v5, vcc, v255, v2, vcc // W64: encoding: [0xff,0x05,0x0a,0x50] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e32 v5, vcc, 0, v2, vcc // W64: encoding: [0x80,0x04,0x0a,0x50] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e32 v5, vcc, -1, v2, vcc // W64: encoding: [0xc1,0x04,0x0a,0x50] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e32 v5, vcc, 0.5, v2, vcc // W64: encoding: [0xf0,0x04,0x0a,0x50] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e32 v5, vcc, -4.0, v2, vcc // W64: encoding: [0xf7,0x04,0x0a,0x50] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e32 v5, vcc, v1, v255, vcc // W64: encoding: [0x01,0xff,0x0b,0x50] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e64 v5, s[12:13], v1, v2, s[6:7] // W64: encoding: [0x05,0x0c,0x28,0xd5,0x01,0x05,0x1a,0x00] @@ -45663,35 +45663,35 @@ v_add_co_ci_u32_e64 v5, s[12:13], v1, v2, vcc v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo // W32: encoding: [0x01,0x05,0x0a,0x50] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e32 v255, vcc_lo, v1, v2, vcc_lo // W32: encoding: [0x01,0x05,0xfe,0x51] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e32 v5, vcc_lo, v255, v2, vcc_lo // W32: encoding: [0xff,0x05,0x0a,0x50] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e32 v5, vcc_lo, 0, v2, vcc_lo // W32: encoding: [0x80,0x04,0x0a,0x50] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e32 v5, vcc_lo, -1, v2, vcc_lo // W32: encoding: [0xc1,0x04,0x0a,0x50] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e32 v5, vcc_lo, 0.5, v2, vcc_lo // W32: encoding: [0xf0,0x04,0x0a,0x50] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e32 v5, vcc_lo, -4.0, v2, vcc_lo // W32: encoding: [0xf7,0x04,0x0a,0x50] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e32 v5, vcc_lo, v1, v255, vcc_lo // W32: encoding: [0x01,0xff,0x0b,0x50] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e64 v5, s12, v1, v2, s6 // W32: encoding: [0x05,0x0c,0x28,0xd5,0x01,0x05,0x1a,0x00] @@ -45767,283 +45767,283 @@ v_add_co_ci_u32_e64 v5, s12, v1, v2, vcc_lo v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v255, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xfe,0x51,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v255, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0xff,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v255, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0xfe,0x0b,0x50,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x26,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x01,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x02,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x03,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x04,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x05,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x0e,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x16,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x16,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x00,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x01,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x02,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x03,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x04,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x05,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, sext(v1), v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x0e,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x02] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x03] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x04] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x0e] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v255, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0xfe,0x51,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v255, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0xff,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v255, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0xfe,0x0b,0x50,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x26,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x00,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x01,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x02,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x03,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x04,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x05,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x0e,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x16,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x16,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x00,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x01,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x02,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x03,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x04,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x05,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, sext(v1), v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x0e,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x02] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x03] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x04] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v5, vcc_lo, v1, sext(v2), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x50,0x01,0x06,0x06,0x0e] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e32 v5, vcc, v1, v2, vcc // W64: encoding: [0x01,0x05,0x0a,0x52] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e32 v255, vcc, v1, v2, vcc // W64: encoding: [0x01,0x05,0xfe,0x53] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e32 v5, vcc, v255, v2, vcc // W64: encoding: [0xff,0x05,0x0a,0x52] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e32 v5, vcc, 0, v2, vcc // W64: encoding: [0x80,0x04,0x0a,0x52] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e32 v5, vcc, -1, v2, vcc // W64: encoding: [0xc1,0x04,0x0a,0x52] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e32 v5, vcc, 0.5, v2, vcc // W64: encoding: [0xf0,0x04,0x0a,0x52] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e32 v5, vcc, -4.0, v2, vcc // W64: encoding: [0xf7,0x04,0x0a,0x52] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e32 v5, vcc, v1, v255, vcc // W64: encoding: [0x01,0xff,0x0b,0x52] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e64 v5, s[12:13], v1, v2, s[6:7] // W64: encoding: [0x05,0x0c,0x29,0xd5,0x01,0x05,0x1a,0x00] @@ -46119,35 +46119,35 @@ v_sub_co_ci_u32_e64 v5, s[12:13], v1, v2, vcc v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo // W32: encoding: [0x01,0x05,0x0a,0x52] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e32 v255, vcc_lo, v1, v2, vcc_lo // W32: encoding: [0x01,0x05,0xfe,0x53] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e32 v5, vcc_lo, v255, v2, vcc_lo // W32: encoding: [0xff,0x05,0x0a,0x52] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e32 v5, vcc_lo, 0, v2, vcc_lo // W32: encoding: [0x80,0x04,0x0a,0x52] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e32 v5, vcc_lo, -1, v2, vcc_lo // W32: encoding: [0xc1,0x04,0x0a,0x52] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e32 v5, vcc_lo, 0.5, v2, vcc_lo // W32: encoding: [0xf0,0x04,0x0a,0x52] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e32 v5, vcc_lo, -4.0, v2, vcc_lo // W32: encoding: [0xf7,0x04,0x0a,0x52] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v255, vcc_lo // W32: encoding: [0x01,0xff,0x0b,0x52] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e64 v5, s12, v1, v2, s6 // W32: encoding: [0x05,0x0c,0x29,0xd5,0x01,0x05,0x1a,0x00] @@ -46223,283 +46223,283 @@ v_sub_co_ci_u32_e64 v5, s12, v1, v2, vcc_lo v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v255, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xfe,0x53,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v255, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0xff,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v255, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0xfe,0x0b,0x52,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x26,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x01,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x02,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x03,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x04,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x05,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x0e,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x16,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x16,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x00,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x01,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x02,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x03,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x04,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x05,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, sext(v1), v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x0e,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x02] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x03] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x04] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x0e] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v255, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0xfe,0x53,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v255, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0xff,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v255, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0xfe,0x0b,0x52,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x26,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x00,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x01,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x02,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x03,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x04,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x05,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x0e,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x16,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x16,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x00,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x01,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x02,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x03,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x04,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x05,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, sext(v1), v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x0e,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x02] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x03] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x04] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v5, vcc_lo, v1, sext(v2), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x52,0x01,0x06,0x06,0x0e] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e32 v5, vcc, v1, v2, vcc // W64: encoding: [0x01,0x05,0x0a,0x54] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e32 v255, vcc, v1, v2, vcc // W64: encoding: [0x01,0x05,0xfe,0x55] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e32 v5, vcc, v255, v2, vcc // W64: encoding: [0xff,0x05,0x0a,0x54] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e32 v5, vcc, 0, v2, vcc // W64: encoding: [0x80,0x04,0x0a,0x54] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e32 v5, vcc, -1, v2, vcc // W64: encoding: [0xc1,0x04,0x0a,0x54] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e32 v5, vcc, 0.5, v2, vcc // W64: encoding: [0xf0,0x04,0x0a,0x54] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e32 v5, vcc, -4.0, v2, vcc // W64: encoding: [0xf7,0x04,0x0a,0x54] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e32 v5, vcc, v1, v255, vcc // W64: encoding: [0x01,0xff,0x0b,0x54] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e64 v5, s[12:13], v1, v2, s[6:7] // W64: encoding: [0x05,0x0c,0x2a,0xd5,0x01,0x05,0x1a,0x00] @@ -46575,35 +46575,35 @@ v_subrev_co_ci_u32_e64 v5, s[12:13], v1, v2, vcc v_subrev_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo // W32: encoding: [0x01,0x05,0x0a,0x54] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e32 v255, vcc_lo, v1, v2, vcc_lo // W32: encoding: [0x01,0x05,0xfe,0x55] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e32 v5, vcc_lo, v255, v2, vcc_lo // W32: encoding: [0xff,0x05,0x0a,0x54] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e32 v5, vcc_lo, 0, v2, vcc_lo // W32: encoding: [0x80,0x04,0x0a,0x54] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e32 v5, vcc_lo, -1, v2, vcc_lo // W32: encoding: [0xc1,0x04,0x0a,0x54] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e32 v5, vcc_lo, 0.5, v2, vcc_lo // W32: encoding: [0xf0,0x04,0x0a,0x54] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e32 v5, vcc_lo, -4.0, v2, vcc_lo // W32: encoding: [0xf7,0x04,0x0a,0x54] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e32 v5, vcc_lo, v1, v255, vcc_lo // W32: encoding: [0x01,0xff,0x0b,0x54] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e64 v5, s12, v1, v2, s6 // W32: encoding: [0x05,0x0c,0x2a,0xd5,0x01,0x05,0x1a,0x00] @@ -46679,251 +46679,251 @@ v_subrev_co_ci_u32_e64 v5, s12, v1, v2, vcc_lo v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v255, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xfe,0x55,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v255, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0xff,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v255, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0xfe,0x0b,0x54,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x26,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x01,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x02,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x03,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x04,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x05,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x0e,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x16,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x16,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x00,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x01,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x02,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x03,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x04,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x05,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, sext(v1), v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x0e,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x02] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x03] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x04] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x0e] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v255, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0xfe,0x55,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v255, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0xff,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v255, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0xfe,0x0b,0x54,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x26,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x00,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x01,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x02,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x03,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x04,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x05,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x0e,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x16,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x16,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x00,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x01,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x02,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x03,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x04,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x05,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, sext(v1), v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x0e,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x06] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x02] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x03] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x04] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v5, vcc_lo, v1, sext(v2), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32: encoding: [0xf9,0x04,0x0a,0x54,0x01,0x06,0x06,0x0e] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_fmac_f32_e32 v5, v1, v2 // GFX10: encoding: [0x01,0x05,0x0a,0x56] @@ -62983,67 +62983,67 @@ v_pk_fmac_f16 v5, v1, v255 v_cmp_f_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x00,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x00,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x00,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x00,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x00,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x00,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x00,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x00,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x00,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x00,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x00,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x00,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x00,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x00,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x00,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x01,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00] @@ -63187,7 +63187,7 @@ v_cmp_f_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_f_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x00,0x7c,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x00,0x7c,0xff,0x86,0x06,0x06] @@ -63291,67 +63291,67 @@ v_cmp_f_f32_sdwa s[6:7], v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_f_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x00,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x00,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x00,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x00,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x00,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x00,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x00,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x00,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x00,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x00,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x00,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x00,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x00,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x00,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x00,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x01,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00] @@ -63599,67 +63599,67 @@ v_cmp_f_f32_sdwa s6, v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_lt_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x02,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x02,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x02,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x02,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x02,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x02,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x02,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x02,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x02,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x02,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x02,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x02,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x02,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x02,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x02,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x03,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x00] @@ -63803,7 +63803,7 @@ v_cmp_lt_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_lt_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x02,0x7c,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x02,0x7c,0xff,0x86,0x06,0x06] @@ -63907,67 +63907,67 @@ v_cmp_lt_f32_sdwa s[6:7], v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_lt_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x02,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x02,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x02,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x02,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x02,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x02,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x02,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x02,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x02,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x02,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x02,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x02,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x02,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x02,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x02,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x03,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x00] @@ -64215,67 +64215,67 @@ v_cmp_lt_f32_sdwa s6, v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_eq_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x04,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x04,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x04,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x04,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x04,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x04,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x04,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x04,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x04,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x04,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x04,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x04,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x04,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x04,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x04,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x05,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00] @@ -64419,7 +64419,7 @@ v_cmp_eq_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_eq_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x04,0x7c,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x04,0x7c,0xff,0x86,0x06,0x06] @@ -64523,67 +64523,67 @@ v_cmp_eq_f32_sdwa s[6:7], v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_eq_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x04,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x04,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x04,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x04,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x04,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x04,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x04,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x04,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x04,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x04,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x04,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x04,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x04,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x04,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x04,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x05,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00] @@ -64831,67 +64831,67 @@ v_cmp_eq_f32_sdwa s6, v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_le_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x06,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x06,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x06,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x06,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x06,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x06,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x06,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x06,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x06,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x06,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x06,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x06,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x06,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x06,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x06,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x07,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00] @@ -65035,7 +65035,7 @@ v_cmp_le_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_le_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x06,0x7c,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x06,0x7c,0xff,0x86,0x06,0x06] @@ -65139,67 +65139,67 @@ v_cmp_le_f32_sdwa s[6:7], v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_le_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x06,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x06,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x06,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x06,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x06,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x06,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x06,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x06,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x06,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x06,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x06,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x06,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x06,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x06,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x06,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x07,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00] @@ -65447,67 +65447,67 @@ v_cmp_le_f32_sdwa s6, v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_gt_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x08,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x08,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x08,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x08,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x08,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x08,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x08,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x08,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x08,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x08,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x08,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x08,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x08,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x08,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x08,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x09,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00] @@ -65651,7 +65651,7 @@ v_cmp_gt_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_gt_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x08,0x7c,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x08,0x7c,0xff,0x86,0x06,0x06] @@ -65755,67 +65755,67 @@ v_cmp_gt_f32_sdwa s[6:7], v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_gt_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x08,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x08,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x08,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x08,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x08,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x08,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x08,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x08,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x08,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x08,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x08,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x08,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x08,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x08,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x08,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x09,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00] @@ -66063,67 +66063,67 @@ v_cmp_gt_f32_sdwa s6, v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_lg_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x0a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x0a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x0a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x0a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x0a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x0a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x0a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x0a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x0a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x0a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x0a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x0a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x0a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x0a,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x0a,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x0b,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00] @@ -66267,7 +66267,7 @@ v_cmp_lg_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_lg_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x7c,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x7c,0xff,0x86,0x06,0x06] @@ -66371,67 +66371,67 @@ v_cmp_lg_f32_sdwa s[6:7], v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_lg_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x0a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x0a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x0a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x0a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x0a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x0a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x0a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x0a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x0a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x0a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x0a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x0a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x0a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x0a,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x0a,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x0b,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00] @@ -66679,67 +66679,67 @@ v_cmp_lg_f32_sdwa s6, v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_ge_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x0c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x0c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x0c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x0c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x0c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x0c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x0c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x0c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x0c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x0c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x0c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x0c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x0c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x0c,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x0c,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x0d,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00] @@ -66883,7 +66883,7 @@ v_cmp_ge_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_ge_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0c,0x7c,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0c,0x7c,0xff,0x86,0x06,0x06] @@ -66987,67 +66987,67 @@ v_cmp_ge_f32_sdwa s[6:7], v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_ge_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x0c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x0c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x0c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x0c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x0c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x0c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x0c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x0c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x0c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x0c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x0c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x0c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x0c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x0c,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x0c,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x0d,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00] @@ -67295,67 +67295,67 @@ v_cmp_ge_f32_sdwa s6, v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_o_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x0e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x0e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x0e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x0e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x0e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x0e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x0e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x0e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x0e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x0e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x0e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x0e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x0e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x0e,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x0e,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x0f,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00] @@ -67499,7 +67499,7 @@ v_cmp_o_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_o_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0e,0x7c,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0e,0x7c,0xff,0x86,0x06,0x06] @@ -67603,67 +67603,67 @@ v_cmp_o_f32_sdwa s[6:7], v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_o_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x0e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x0e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x0e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x0e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x0e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x0e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x0e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x0e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x0e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x0e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x0e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x0e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x0e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x0e,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x0e,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x0f,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00] @@ -67911,67 +67911,67 @@ v_cmp_o_f32_sdwa s6, v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_u_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x10,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x10,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x10,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x10,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x10,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x10,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x10,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x10,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x10,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x10,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x10,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x10,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x10,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x10,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x10,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x11,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00] @@ -68115,7 +68115,7 @@ v_cmp_u_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_u_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x10,0x7c,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x10,0x7c,0xff,0x86,0x06,0x06] @@ -68219,67 +68219,67 @@ v_cmp_u_f32_sdwa s[6:7], v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_u_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x10,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x10,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x10,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x10,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x10,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x10,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x10,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x10,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x10,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x10,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x10,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x10,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x10,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x10,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x10,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x11,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00] @@ -68527,67 +68527,67 @@ v_cmp_u_f32_sdwa s6, v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_nge_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x12,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x12,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x12,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x12,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x12,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x12,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x12,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x12,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x12,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x12,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x12,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x12,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x12,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x12,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x12,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x13,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00] @@ -68731,7 +68731,7 @@ v_cmp_nge_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_nge_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x12,0x7c,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x12,0x7c,0xff,0x86,0x06,0x06] @@ -68835,67 +68835,67 @@ v_cmp_nge_f32_sdwa s[6:7], v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_nge_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x12,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x12,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x12,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x12,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x12,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x12,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x12,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x12,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x12,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x12,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x12,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x12,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x12,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x12,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x12,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x13,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00] @@ -69143,67 +69143,67 @@ v_cmp_nge_f32_sdwa s6, v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_nlg_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x14,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x14,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x14,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x14,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x14,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x14,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x14,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x14,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x14,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x14,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x14,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x14,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x14,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x14,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x14,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x15,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00] @@ -69347,7 +69347,7 @@ v_cmp_nlg_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_nlg_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x14,0x7c,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x14,0x7c,0xff,0x86,0x06,0x06] @@ -69451,67 +69451,67 @@ v_cmp_nlg_f32_sdwa s[6:7], v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_nlg_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x14,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x14,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x14,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x14,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x14,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x14,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x14,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x14,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x14,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x14,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x14,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x14,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x14,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x14,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x14,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x15,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00] @@ -69759,67 +69759,67 @@ v_cmp_nlg_f32_sdwa s6, v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_ngt_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x16,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x16,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x16,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x16,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x16,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x16,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x16,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x16,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x16,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x16,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x16,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x16,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x16,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x16,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x16,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x17,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00] @@ -69963,7 +69963,7 @@ v_cmp_ngt_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_ngt_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x16,0x7c,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x16,0x7c,0xff,0x86,0x06,0x06] @@ -70067,67 +70067,67 @@ v_cmp_ngt_f32_sdwa s[6:7], v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_ngt_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x16,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x16,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x16,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x16,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x16,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x16,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x16,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x16,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x16,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x16,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x16,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x16,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x16,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x16,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x16,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x17,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00] @@ -70375,67 +70375,67 @@ v_cmp_ngt_f32_sdwa s6, v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_nle_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x18,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x18,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x18,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x18,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x18,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x18,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x18,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x18,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x18,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x18,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x18,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x18,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x18,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x18,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x18,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x19,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00] @@ -70579,7 +70579,7 @@ v_cmp_nle_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_nle_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x18,0x7c,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x18,0x7c,0xff,0x86,0x06,0x06] @@ -70683,67 +70683,67 @@ v_cmp_nle_f32_sdwa s[6:7], v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_nle_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x18,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x18,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x18,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x18,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x18,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x18,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x18,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x18,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x18,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x18,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x18,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x18,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x18,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x18,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x18,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x19,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00] @@ -70991,67 +70991,67 @@ v_cmp_nle_f32_sdwa s6, v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_neq_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x1a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x1a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x1a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x1a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x1a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x1a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x1a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x1a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x1a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x1a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x1a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x1a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x1a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x1a,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x1a,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x1b,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00] @@ -71195,7 +71195,7 @@ v_cmp_neq_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_neq_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x1a,0x7c,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x1a,0x7c,0xff,0x86,0x06,0x06] @@ -71299,67 +71299,67 @@ v_cmp_neq_f32_sdwa s[6:7], v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_neq_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x1a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x1a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x1a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x1a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x1a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x1a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x1a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x1a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x1a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x1a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x1a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x1a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x1a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x1a,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x1a,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x1b,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00] @@ -71607,67 +71607,67 @@ v_cmp_neq_f32_sdwa s6, v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_nlt_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x1c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x1c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x1c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x1c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x1c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x1c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x1c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x1c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x1c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x1c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x1c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x1c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x1c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x1c,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x1c,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x1d,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00] @@ -71811,7 +71811,7 @@ v_cmp_nlt_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_nlt_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x1c,0x7c,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x1c,0x7c,0xff,0x86,0x06,0x06] @@ -71915,67 +71915,67 @@ v_cmp_nlt_f32_sdwa s[6:7], v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_nlt_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x1c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x1c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x1c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x1c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x1c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x1c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x1c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x1c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x1c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x1c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x1c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x1c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x1c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x1c,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x1c,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x1d,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00] @@ -72223,67 +72223,67 @@ v_cmp_nlt_f32_sdwa s6, v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_tru_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x1e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x1e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x1e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x1e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x1e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x1e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x1e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x1e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x1e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x1e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x1e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x1e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x1e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x1e,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x1e,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x1f,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00] @@ -72427,7 +72427,7 @@ v_cmp_tru_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_tru_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x1e,0x7c,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x1e,0x7c,0xff,0x86,0x06,0x06] @@ -72531,67 +72531,67 @@ v_cmp_tru_f32_sdwa s[6:7], v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_tru_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x1e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x1e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x1e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x1e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x1e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x1e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x1e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x1e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x1e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x1e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x1e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x1e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x1e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x1e,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x1e,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x1f,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00] @@ -76199,59 +76199,59 @@ v_cmpx_tru_f32_sdwa v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_f_f64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x40,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x40,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x40,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x40,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x40,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x40,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x40,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x40,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x40,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x40,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x40,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x40,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x40,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x41,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0x20,0xd4,0x01,0x05,0x02,0x00] @@ -76367,59 +76367,59 @@ v_cmp_f_f64_e64 s[10:11], v[1:2], v[2:3] clamp v_cmp_f_f64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x40,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x40,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x40,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x40,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x40,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x40,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x40,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x40,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x40,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x40,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x40,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x40,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x40,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x41,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0x20,0xd4,0x01,0x05,0x02,0x00] @@ -76535,59 +76535,59 @@ v_cmp_f_f64_e64 s10, v[1:2], v[2:3] clamp v_cmp_lt_f64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x42,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x42,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x42,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x42,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x42,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x42,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x42,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x42,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x42,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x42,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x42,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x42,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x42,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x43,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x00] @@ -76703,59 +76703,59 @@ v_cmp_lt_f64_e64 s[10:11], v[1:2], v[2:3] clamp v_cmp_lt_f64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x42,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x42,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x42,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x42,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x42,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x42,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x42,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x42,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x42,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x42,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x42,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x42,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x42,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x43,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x00] @@ -76871,59 +76871,59 @@ v_cmp_lt_f64_e64 s10, v[1:2], v[2:3] clamp v_cmp_eq_f64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x44,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x44,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x44,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x44,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x44,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x44,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x44,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x44,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x44,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x44,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x44,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x44,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x44,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x45,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x00] @@ -77039,59 +77039,59 @@ v_cmp_eq_f64_e64 s[10:11], v[1:2], v[2:3] clamp v_cmp_eq_f64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x44,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x44,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x44,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x44,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x44,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x44,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x44,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x44,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x44,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x44,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x44,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x44,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x44,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x45,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x00] @@ -77207,59 +77207,59 @@ v_cmp_eq_f64_e64 s10, v[1:2], v[2:3] clamp v_cmp_le_f64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x46,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x46,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x46,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x46,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x46,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x46,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x46,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x46,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x46,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x46,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x46,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x46,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x46,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x47,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x00] @@ -77375,59 +77375,59 @@ v_cmp_le_f64_e64 s[10:11], v[1:2], v[2:3] clamp v_cmp_le_f64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x46,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x46,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x46,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x46,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x46,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x46,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x46,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x46,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x46,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x46,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x46,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x46,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x46,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x47,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x00] @@ -77543,59 +77543,59 @@ v_cmp_le_f64_e64 s10, v[1:2], v[2:3] clamp v_cmp_gt_f64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x48,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x48,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x48,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x48,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x48,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x48,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x48,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x48,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x48,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x48,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x48,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x48,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x48,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x49,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x00] @@ -77711,59 +77711,59 @@ v_cmp_gt_f64_e64 s[10:11], v[1:2], v[2:3] clamp v_cmp_gt_f64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x48,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x48,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x48,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x48,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x48,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x48,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x48,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x48,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x48,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x48,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x48,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x48,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x48,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x49,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x00] @@ -77879,59 +77879,59 @@ v_cmp_gt_f64_e64 s10, v[1:2], v[2:3] clamp v_cmp_lg_f64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x4a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x4a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x4a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x4a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x4a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x4a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x4a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x4a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x4a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x4a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x4a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x4a,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x4a,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x4b,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x00] @@ -78047,59 +78047,59 @@ v_cmp_lg_f64_e64 s[10:11], v[1:2], v[2:3] clamp v_cmp_lg_f64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x4a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x4a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x4a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x4a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x4a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x4a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x4a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x4a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x4a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x4a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x4a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x4a,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x4a,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x4b,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x00] @@ -78215,59 +78215,59 @@ v_cmp_lg_f64_e64 s10, v[1:2], v[2:3] clamp v_cmp_ge_f64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x4c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x4c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x4c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x4c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x4c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x4c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x4c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x4c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x4c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x4c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x4c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x4c,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x4c,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x4d,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x00] @@ -78383,59 +78383,59 @@ v_cmp_ge_f64_e64 s[10:11], v[1:2], v[2:3] clamp v_cmp_ge_f64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x4c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x4c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x4c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x4c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x4c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x4c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x4c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x4c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x4c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x4c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x4c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x4c,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x4c,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x4d,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x00] @@ -78551,59 +78551,59 @@ v_cmp_ge_f64_e64 s10, v[1:2], v[2:3] clamp v_cmp_o_f64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x4e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x4e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x4e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x4e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x4e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x4e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x4e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x4e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x4e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x4e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x4e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x4e,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x4e,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x4f,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x00] @@ -78719,59 +78719,59 @@ v_cmp_o_f64_e64 s[10:11], v[1:2], v[2:3] clamp v_cmp_o_f64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x4e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x4e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x4e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x4e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x4e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x4e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x4e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x4e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x4e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x4e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x4e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x4e,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x4e,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x4f,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x00] @@ -78887,59 +78887,59 @@ v_cmp_o_f64_e64 s10, v[1:2], v[2:3] clamp v_cmp_u_f64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x50,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x50,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x50,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x50,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x50,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x50,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x50,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x50,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x50,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x50,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x50,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x50,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x50,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x51,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x00] @@ -79055,59 +79055,59 @@ v_cmp_u_f64_e64 s[10:11], v[1:2], v[2:3] clamp v_cmp_u_f64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x50,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x50,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x50,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x50,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x50,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x50,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x50,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x50,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x50,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x50,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x50,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x50,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x50,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x51,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x00] @@ -79223,59 +79223,59 @@ v_cmp_u_f64_e64 s10, v[1:2], v[2:3] clamp v_cmp_nge_f64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x52,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x52,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x52,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x52,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x52,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x52,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x52,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x52,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x52,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x52,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x52,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x52,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x52,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x53,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x00] @@ -79391,59 +79391,59 @@ v_cmp_nge_f64_e64 s[10:11], v[1:2], v[2:3] clamp v_cmp_nge_f64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x52,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x52,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x52,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x52,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x52,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x52,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x52,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x52,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x52,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x52,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x52,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x52,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x52,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x53,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x00] @@ -79559,59 +79559,59 @@ v_cmp_nge_f64_e64 s10, v[1:2], v[2:3] clamp v_cmp_nlg_f64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x54,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x54,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x54,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x54,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x54,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x54,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x54,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x54,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x54,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x54,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x54,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x54,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x54,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x55,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00] @@ -79727,59 +79727,59 @@ v_cmp_nlg_f64_e64 s[10:11], v[1:2], v[2:3] clamp v_cmp_nlg_f64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x54,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x54,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x54,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x54,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x54,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x54,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x54,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x54,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x54,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x54,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x54,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x54,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x54,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x55,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00] @@ -79895,59 +79895,59 @@ v_cmp_nlg_f64_e64 s10, v[1:2], v[2:3] clamp v_cmp_ngt_f64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x56,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x56,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x56,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x56,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x56,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x56,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x56,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x56,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x56,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x56,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x56,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x56,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x56,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x57,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00] @@ -80063,59 +80063,59 @@ v_cmp_ngt_f64_e64 s[10:11], v[1:2], v[2:3] clamp v_cmp_ngt_f64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x56,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x56,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x56,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x56,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x56,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x56,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x56,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x56,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x56,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x56,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x56,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x56,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x56,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x57,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00] @@ -80231,59 +80231,59 @@ v_cmp_ngt_f64_e64 s10, v[1:2], v[2:3] clamp v_cmp_nle_f64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x58,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x58,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x58,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x58,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x58,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x58,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x58,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x58,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x58,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x58,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x58,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x58,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x58,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x59,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00] @@ -80399,59 +80399,59 @@ v_cmp_nle_f64_e64 s[10:11], v[1:2], v[2:3] clamp v_cmp_nle_f64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x58,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x58,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x58,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x58,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x58,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x58,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x58,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x58,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x58,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x58,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x58,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x58,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x58,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x59,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00] @@ -80567,59 +80567,59 @@ v_cmp_nle_f64_e64 s10, v[1:2], v[2:3] clamp v_cmp_neq_f64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x5a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x5a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x5a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x5a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x5a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x5a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x5a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x5a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x5a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x5a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x5a,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x5a,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x5a,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x5b,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00] @@ -80735,59 +80735,59 @@ v_cmp_neq_f64_e64 s[10:11], v[1:2], v[2:3] clamp v_cmp_neq_f64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x5a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x5a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x5a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x5a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x5a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x5a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x5a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x5a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x5a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x5a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x5a,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x5a,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x5a,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x5b,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00] @@ -80903,59 +80903,59 @@ v_cmp_neq_f64_e64 s10, v[1:2], v[2:3] clamp v_cmp_nlt_f64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x5c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x5c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x5c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x5c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x5c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x5c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x5c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x5c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x5c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x5c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x5c,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x5c,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x5c,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x5d,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00] @@ -81071,59 +81071,59 @@ v_cmp_nlt_f64_e64 s[10:11], v[1:2], v[2:3] clamp v_cmp_nlt_f64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x5c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x5c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x5c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x5c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x5c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x5c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x5c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x5c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x5c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x5c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x5c,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x5c,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x5c,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x5d,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00] @@ -81239,59 +81239,59 @@ v_cmp_nlt_f64_e64 s10, v[1:2], v[2:3] clamp v_cmp_tru_f64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x5e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x5e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x5e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x5e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x5e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x5e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x5e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x5e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x5e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x5e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x5e,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x5e,0x7c,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x5e,0x7c,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x5f,0x7c] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00] @@ -81407,59 +81407,59 @@ v_cmp_tru_f64_e64 s[10:11], v[1:2], v[2:3] clamp v_cmp_tru_f64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x5e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x5e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x5e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x5e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x5e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x5e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x5e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x5e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x5e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x5e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x5e,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x5e,0x7c,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x5e,0x7c,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x5f,0x7c] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00] @@ -83396,67 +83396,67 @@ v_cmpx_tru_f64_e64 -v[1:2], -v[2:3] v_cmp_f_i32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x00,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x00,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x00,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x00,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x00,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x00,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x00,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x00,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x00,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x00,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x00,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x00,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x00,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x00,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x00,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x01,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x80,0xd4,0x01,0x05,0x02,0x00] @@ -83572,67 +83572,67 @@ v_cmp_f_i32_e64 s[10:11], v1, -4.0 v_cmp_lt_i32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x02,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x02,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x02,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x02,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x02,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x02,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x02,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x02,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x02,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x02,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x02,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x02,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x02,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x02,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x02,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x03,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x81,0xd4,0x01,0x05,0x02,0x00] @@ -83748,67 +83748,67 @@ v_cmp_lt_i32_e64 s[10:11], v1, -4.0 v_cmp_eq_i32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x04,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x04,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x04,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x04,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x04,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x04,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x04,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x04,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x04,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x04,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x04,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x04,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x04,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x04,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x04,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x05,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x82,0xd4,0x01,0x05,0x02,0x00] @@ -83924,67 +83924,67 @@ v_cmp_eq_i32_e64 s[10:11], v1, -4.0 v_cmp_le_i32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x06,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x06,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x06,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x06,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x06,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x06,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x06,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x06,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x06,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x06,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x06,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x06,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x06,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x06,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x06,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x07,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x83,0xd4,0x01,0x05,0x02,0x00] @@ -84100,67 +84100,67 @@ v_cmp_le_i32_e64 s[10:11], v1, -4.0 v_cmp_gt_i32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x08,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x08,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x08,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x08,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x08,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x08,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x08,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x08,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x08,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x08,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x08,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x08,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x08,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x08,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x08,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x09,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x84,0xd4,0x01,0x05,0x02,0x00] @@ -84276,67 +84276,67 @@ v_cmp_gt_i32_e64 s[10:11], v1, -4.0 v_cmp_ne_i32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x0a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x0a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x0a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x0a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x0a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x0a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x0a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x0a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x0a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x0a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x0a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x0a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x0a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x0a,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x0a,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x0b,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x85,0xd4,0x01,0x05,0x02,0x00] @@ -84452,67 +84452,67 @@ v_cmp_ne_i32_e64 s[10:11], v1, -4.0 v_cmp_ge_i32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x0c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x0c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x0c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x0c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x0c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x0c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x0c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x0c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x0c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x0c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x0c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x0c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x0c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x0c,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x0c,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x0d,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x86,0xd4,0x01,0x05,0x02,0x00] @@ -84628,67 +84628,67 @@ v_cmp_ge_i32_e64 s[10:11], v1, -4.0 v_cmp_t_i32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x0e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x0e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x0e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x0e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x0e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x0e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x0e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x0e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x0e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x0e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x0e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x0e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x0e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x0e,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x0e,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x0f,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x87,0xd4,0x01,0x05,0x02,0x00] @@ -84816,7 +84816,7 @@ v_cmp_f_i32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_f_i32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x00,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x00,0x7d,0xff,0x86,0x06,0x06] @@ -84924,7 +84924,7 @@ v_cmp_lt_i32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_lt_i32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x02,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x02,0x7d,0xff,0x86,0x06,0x06] @@ -85032,7 +85032,7 @@ v_cmp_eq_i32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_eq_i32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x04,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x04,0x7d,0xff,0x86,0x06,0x06] @@ -85140,7 +85140,7 @@ v_cmp_le_i32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_le_i32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x06,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x06,0x7d,0xff,0x86,0x06,0x06] @@ -85248,7 +85248,7 @@ v_cmp_gt_i32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_gt_i32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x08,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x08,0x7d,0xff,0x86,0x06,0x06] @@ -85356,7 +85356,7 @@ v_cmp_ne_i32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_ne_i32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0a,0x7d,0xff,0x86,0x06,0x06] @@ -85464,7 +85464,7 @@ v_cmp_ge_i32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_ge_i32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0c,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0c,0x7d,0xff,0x86,0x06,0x06] @@ -85572,7 +85572,7 @@ v_cmp_t_i32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_t_i32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0e,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x0e,0x7d,0xff,0x86,0x06,0x06] @@ -85668,67 +85668,67 @@ v_cmp_t_i32_sdwa s[6:7], v1, sext(v2) src0_sel:DWORD src1_sel:DWORD v_cmp_f_i32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x00,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x00,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x00,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x00,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x00,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x00,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x00,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x00,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x00,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x00,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x00,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x00,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x00,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x00,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x00,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x01,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x80,0xd4,0x01,0x05,0x02,0x00] @@ -85844,67 +85844,67 @@ v_cmp_f_i32_e64 s10, v1, -4.0 v_cmp_lt_i32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x02,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x02,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x02,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x02,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x02,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x02,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x02,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x02,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x02,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x02,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x02,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x02,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x02,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x02,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x02,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x03,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x81,0xd4,0x01,0x05,0x02,0x00] @@ -86020,67 +86020,67 @@ v_cmp_lt_i32_e64 s10, v1, -4.0 v_cmp_eq_i32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x04,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x04,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x04,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x04,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x04,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x04,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x04,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x04,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x04,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x04,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x04,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x04,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x04,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x04,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x04,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x05,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x82,0xd4,0x01,0x05,0x02,0x00] @@ -86196,67 +86196,67 @@ v_cmp_eq_i32_e64 s10, v1, -4.0 v_cmp_le_i32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x06,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x06,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x06,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x06,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x06,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x06,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x06,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x06,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x06,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x06,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x06,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x06,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x06,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x06,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x06,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x07,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x83,0xd4,0x01,0x05,0x02,0x00] @@ -86372,67 +86372,67 @@ v_cmp_le_i32_e64 s10, v1, -4.0 v_cmp_gt_i32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x08,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x08,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x08,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x08,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x08,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x08,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x08,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x08,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x08,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x08,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x08,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x08,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x08,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x08,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x08,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x09,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x84,0xd4,0x01,0x05,0x02,0x00] @@ -86548,67 +86548,67 @@ v_cmp_gt_i32_e64 s10, v1, -4.0 v_cmp_ne_i32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x0a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x0a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x0a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x0a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x0a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x0a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x0a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x0a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x0a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x0a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x0a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x0a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x0a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x0a,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x0a,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x0b,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x85,0xd4,0x01,0x05,0x02,0x00] @@ -86724,67 +86724,67 @@ v_cmp_ne_i32_e64 s10, v1, -4.0 v_cmp_ge_i32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x0c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x0c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x0c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x0c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x0c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x0c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x0c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x0c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x0c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x0c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x0c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x0c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x0c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x0c,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x0c,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x0d,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x86,0xd4,0x01,0x05,0x02,0x00] @@ -86900,67 +86900,67 @@ v_cmp_ge_i32_e64 s10, v1, -4.0 v_cmp_t_i32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x0e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x0e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x0e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x0e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x0e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x0e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x0e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x0e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x0e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x0e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x0e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x0e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x0e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x0e,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x0e,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x0f,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x87,0xd4,0x01,0x05,0x02,0x00] @@ -87940,67 +87940,67 @@ v_cmp_t_i32_sdwa s6, v1, sext(v2) src0_sel:DWORD src1_sel:DWORD v_cmp_class_f32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x10,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x10,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x10,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x10,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x10,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x10,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x10,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x10,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x10,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x10,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x10,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x10,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x10,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x10,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x10,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x11,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x88,0xd4,0x01,0x05,0x02,0x00] @@ -88132,7 +88132,7 @@ v_cmp_class_f32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_class_f32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x10,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x10,0x7d,0xff,0x86,0x06,0x06] @@ -88232,67 +88232,67 @@ v_cmp_class_f32_sdwa s[6:7], v1, sext(v2) src0_sel:DWORD src1_sel:DWORD v_cmp_class_f32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x10,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x10,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x10,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x10,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x10,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x10,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x10,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x10,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x10,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x10,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x10,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x10,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x10,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x10,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x10,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x11,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x88,0xd4,0x01,0x05,0x02,0x00] @@ -88524,67 +88524,67 @@ v_cmp_class_f32_sdwa s6, v1, sext(v2) src0_sel:DWORD src1_sel:DWORD v_cmp_lt_i16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x12,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x12,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x12,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x12,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x12,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x12,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x12,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x12,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x12,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x12,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x12,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, 0.5, v2 // W64: encoding: [0xff,0x04,0x12,0x7d,0x00,0x38,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, -4.0, v2 // W64: encoding: [0xff,0x04,0x12,0x7d,0x00,0xc4,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x12,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x12,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x13,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x89,0xd4,0x01,0x05,0x02,0x00] @@ -88700,67 +88700,67 @@ v_cmp_lt_i16_e64 s[10:11], v1, -4.0 v_cmp_eq_i16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x14,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x14,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x14,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x14,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x14,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x14,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x14,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x14,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x14,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x14,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x14,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, 0.5, v2 // W64: encoding: [0xff,0x04,0x14,0x7d,0x00,0x38,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, -4.0, v2 // W64: encoding: [0xff,0x04,0x14,0x7d,0x00,0xc4,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x14,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x14,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x15,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x8a,0xd4,0x01,0x05,0x02,0x00] @@ -88876,67 +88876,67 @@ v_cmp_eq_i16_e64 s[10:11], v1, -4.0 v_cmp_le_i16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x16,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x16,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x16,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x16,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x16,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x16,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x16,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x16,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x16,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x16,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x16,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, 0.5, v2 // W64: encoding: [0xff,0x04,0x16,0x7d,0x00,0x38,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, -4.0, v2 // W64: encoding: [0xff,0x04,0x16,0x7d,0x00,0xc4,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x16,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x16,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x17,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x8b,0xd4,0x01,0x05,0x02,0x00] @@ -89052,67 +89052,67 @@ v_cmp_le_i16_e64 s[10:11], v1, -4.0 v_cmp_gt_i16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x18,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x18,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x18,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x18,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x18,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x18,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x18,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x18,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x18,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x18,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x18,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, 0.5, v2 // W64: encoding: [0xff,0x04,0x18,0x7d,0x00,0x38,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, -4.0, v2 // W64: encoding: [0xff,0x04,0x18,0x7d,0x00,0xc4,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x18,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x18,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x19,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x8c,0xd4,0x01,0x05,0x02,0x00] @@ -89228,67 +89228,67 @@ v_cmp_gt_i16_e64 s[10:11], v1, -4.0 v_cmp_ne_i16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x1a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x1a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x1a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x1a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x1a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x1a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x1a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x1a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x1a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x1a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x1a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, 0.5, v2 // W64: encoding: [0xff,0x04,0x1a,0x7d,0x00,0x38,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, -4.0, v2 // W64: encoding: [0xff,0x04,0x1a,0x7d,0x00,0xc4,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x1a,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x1a,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x1b,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x8d,0xd4,0x01,0x05,0x02,0x00] @@ -89404,67 +89404,67 @@ v_cmp_ne_i16_e64 s[10:11], v1, -4.0 v_cmp_ge_i16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x1c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x1c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x1c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x1c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x1c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x1c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x1c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x1c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x1c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x1c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x1c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, 0.5, v2 // W64: encoding: [0xff,0x04,0x1c,0x7d,0x00,0x38,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, -4.0, v2 // W64: encoding: [0xff,0x04,0x1c,0x7d,0x00,0xc4,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x1c,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x1c,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x1d,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x8e,0xd4,0x01,0x05,0x02,0x00] @@ -89592,7 +89592,7 @@ v_cmp_lt_i16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_lt_i16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x12,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x12,0x7d,0xff,0x86,0x06,0x06] @@ -89700,7 +89700,7 @@ v_cmp_eq_i16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_eq_i16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x14,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x14,0x7d,0xff,0x86,0x06,0x06] @@ -89808,7 +89808,7 @@ v_cmp_le_i16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_le_i16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x16,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x16,0x7d,0xff,0x86,0x06,0x06] @@ -89916,7 +89916,7 @@ v_cmp_gt_i16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_gt_i16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x18,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x18,0x7d,0xff,0x86,0x06,0x06] @@ -90024,7 +90024,7 @@ v_cmp_ne_i16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_ne_i16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x1a,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x1a,0x7d,0xff,0x86,0x06,0x06] @@ -90132,7 +90132,7 @@ v_cmp_ge_i16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_ge_i16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x1c,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x1c,0x7d,0xff,0x86,0x06,0x06] @@ -90228,67 +90228,67 @@ v_cmp_ge_i16_sdwa s[6:7], v1, sext(v2) src0_sel:DWORD src1_sel:DWORD v_cmp_lt_i16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x12,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x12,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x12,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x12,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x12,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x12,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x12,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x12,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x12,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x12,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x12,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, 0.5, v2 // W32: encoding: [0xff,0x04,0x12,0x7d,0x00,0x38,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, -4.0, v2 // W32: encoding: [0xff,0x04,0x12,0x7d,0x00,0xc4,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x12,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x12,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x13,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x89,0xd4,0x01,0x05,0x02,0x00] @@ -90404,67 +90404,67 @@ v_cmp_lt_i16_e64 s10, v1, -4.0 v_cmp_eq_i16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x14,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x14,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x14,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x14,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x14,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x14,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x14,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x14,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x14,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x14,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x14,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, 0.5, v2 // W32: encoding: [0xff,0x04,0x14,0x7d,0x00,0x38,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, -4.0, v2 // W32: encoding: [0xff,0x04,0x14,0x7d,0x00,0xc4,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x14,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x14,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x15,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x8a,0xd4,0x01,0x05,0x02,0x00] @@ -90580,67 +90580,67 @@ v_cmp_eq_i16_e64 s10, v1, -4.0 v_cmp_le_i16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x16,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x16,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x16,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x16,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x16,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x16,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x16,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x16,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x16,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x16,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x16,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, 0.5, v2 // W32: encoding: [0xff,0x04,0x16,0x7d,0x00,0x38,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, -4.0, v2 // W32: encoding: [0xff,0x04,0x16,0x7d,0x00,0xc4,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x16,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x16,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x17,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x8b,0xd4,0x01,0x05,0x02,0x00] @@ -90756,67 +90756,67 @@ v_cmp_le_i16_e64 s10, v1, -4.0 v_cmp_gt_i16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x18,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x18,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x18,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x18,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x18,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x18,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x18,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x18,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x18,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x18,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x18,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, 0.5, v2 // W32: encoding: [0xff,0x04,0x18,0x7d,0x00,0x38,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, -4.0, v2 // W32: encoding: [0xff,0x04,0x18,0x7d,0x00,0xc4,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x18,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x18,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x19,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x8c,0xd4,0x01,0x05,0x02,0x00] @@ -90932,67 +90932,67 @@ v_cmp_gt_i16_e64 s10, v1, -4.0 v_cmp_ne_i16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x1a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x1a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x1a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x1a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x1a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x1a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x1a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x1a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x1a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x1a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x1a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, 0.5, v2 // W32: encoding: [0xff,0x04,0x1a,0x7d,0x00,0x38,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, -4.0, v2 // W32: encoding: [0xff,0x04,0x1a,0x7d,0x00,0xc4,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x1a,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x1a,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x1b,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x8d,0xd4,0x01,0x05,0x02,0x00] @@ -91108,67 +91108,67 @@ v_cmp_ne_i16_e64 s10, v1, -4.0 v_cmp_ge_i16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x1c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x1c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x1c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x1c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x1c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x1c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x1c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x1c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x1c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x1c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x1c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, 0.5, v2 // W32: encoding: [0xff,0x04,0x1c,0x7d,0x00,0x38,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, -4.0, v2 // W32: encoding: [0xff,0x04,0x1c,0x7d,0x00,0xc4,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x1c,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x1c,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x1d,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0x8e,0xd4,0x01,0x05,0x02,0x00] @@ -91932,67 +91932,67 @@ v_cmp_ge_i16_sdwa s6, v1, sext(v2) src0_sel:DWORD src1_sel:DWORD v_cmp_class_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x1e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x1e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x1e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x1e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x1e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x1e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x1e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x1e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x1e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x1e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x1e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x1e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x1e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x1e,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x1e,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x1f,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0x8f,0xd4,0x01,0x05,0x02,0x00] @@ -92124,7 +92124,7 @@ v_cmp_class_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x1e,0x7d,0xff,0x86,0x06,0x06] @@ -102988,59 +102988,59 @@ v_cmpx_tru_f16_sdwa v1, |v2| src0_sel:DWORD src1_sel:DWORD v_cmp_f_i64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x40,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x40,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x40,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x40,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x40,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x40,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x40,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x40,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x40,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x40,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x40,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x40,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x40,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x41,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0xa0,0xd4,0x01,0x05,0x02,0x00] @@ -103140,59 +103140,59 @@ v_cmp_f_i64_e64 s[10:11], v[1:2], -4.0 v_cmp_lt_i64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x42,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x42,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x42,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x42,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x42,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x42,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x42,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x42,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x42,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x42,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x42,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x42,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x42,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x43,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0xa1,0xd4,0x01,0x05,0x02,0x00] @@ -103292,59 +103292,59 @@ v_cmp_lt_i64_e64 s[10:11], v[1:2], -4.0 v_cmp_eq_i64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x44,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x44,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x44,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x44,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x44,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x44,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x44,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x44,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x44,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x44,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x44,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x44,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x44,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x45,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0xa2,0xd4,0x01,0x05,0x02,0x00] @@ -103444,59 +103444,59 @@ v_cmp_eq_i64_e64 s[10:11], v[1:2], -4.0 v_cmp_le_i64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x46,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x46,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x46,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x46,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x46,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x46,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x46,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x46,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x46,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x46,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x46,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x46,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x46,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x47,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0xa3,0xd4,0x01,0x05,0x02,0x00] @@ -103596,59 +103596,59 @@ v_cmp_le_i64_e64 s[10:11], v[1:2], -4.0 v_cmp_gt_i64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x48,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x48,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x48,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x48,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x48,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x48,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x48,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x48,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x48,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x48,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x48,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x48,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x48,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x49,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0xa4,0xd4,0x01,0x05,0x02,0x00] @@ -103748,59 +103748,59 @@ v_cmp_gt_i64_e64 s[10:11], v[1:2], -4.0 v_cmp_ne_i64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x4a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x4a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x4a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x4a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x4a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x4a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x4a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x4a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x4a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x4a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x4a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x4a,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x4a,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x4b,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0xa5,0xd4,0x01,0x05,0x02,0x00] @@ -103900,59 +103900,59 @@ v_cmp_ne_i64_e64 s[10:11], v[1:2], -4.0 v_cmp_ge_i64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x4c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x4c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x4c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x4c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x4c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x4c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x4c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x4c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x4c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x4c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x4c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x4c,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x4c,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x4d,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0xa6,0xd4,0x01,0x05,0x02,0x00] @@ -104052,59 +104052,59 @@ v_cmp_ge_i64_e64 s[10:11], v[1:2], -4.0 v_cmp_t_i64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0x4e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0x4e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0x4e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0x4e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0x4e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0x4e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0x4e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0x4e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0x4e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0x4e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0x4e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0x4e,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0x4e,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0x4f,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0xa7,0xd4,0x01,0x05,0x02,0x00] @@ -104204,59 +104204,59 @@ v_cmp_t_i64_e64 s[10:11], v[1:2], -4.0 v_cmp_class_f64 vcc, v[1:2], v2 // W64: encoding: [0x01,0x05,0x50,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc, v[254:255], v2 // W64: encoding: [0xfe,0x05,0x50,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc, s[2:3], v2 // W64: encoding: [0x02,0x04,0x50,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc, s[4:5], v2 // W64: encoding: [0x04,0x04,0x50,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc, s[100:101], v2 // W64: encoding: [0x64,0x04,0x50,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc, vcc, v2 // W64: encoding: [0x6a,0x04,0x50,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc, exec, v2 // W64: encoding: [0x7e,0x04,0x50,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x50,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x50,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x50,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x50,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x50,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x50,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc, v[1:2], v255 // W64: encoding: [0x01,0xff,0x51,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64_e64 s[10:11], v[1:2], v2 // W64: encoding: [0x0a,0x00,0xa8,0xd4,0x01,0x05,0x02,0x00] @@ -104368,59 +104368,59 @@ v_cmp_class_f64_e64 s[10:11], -v[1:2], v2 v_cmp_f_u64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0xc0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0xc0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0xc0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0xc0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0xc0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0xc0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0xc0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0xc0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0xc0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0xc0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0xc0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0xc0,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0xc0,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0xc1,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0xe0,0xd4,0x01,0x05,0x02,0x00] @@ -104520,59 +104520,59 @@ v_cmp_f_u64_e64 s[10:11], v[1:2], -4.0 v_cmp_lt_u64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0xc2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0xc2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0xc2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0xc2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0xc2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0xc2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0xc2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0xc2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0xc2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0xc2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0xc2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0xc2,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0xc2,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0xc3,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0xe1,0xd4,0x01,0x05,0x02,0x00] @@ -104672,59 +104672,59 @@ v_cmp_lt_u64_e64 s[10:11], v[1:2], -4.0 v_cmp_eq_u64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0xc4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0xc4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0xc4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0xc4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0xc4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0xc4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0xc4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0xc4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0xc4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0xc4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0xc4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0xc4,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0xc4,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0xc5,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0xe2,0xd4,0x01,0x05,0x02,0x00] @@ -104824,59 +104824,59 @@ v_cmp_eq_u64_e64 s[10:11], v[1:2], -4.0 v_cmp_le_u64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0xc6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0xc6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0xc6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0xc6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0xc6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0xc6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0xc6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0xc6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0xc6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0xc6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0xc6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0xc6,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0xc6,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0xc7,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0xe3,0xd4,0x01,0x05,0x02,0x00] @@ -104976,59 +104976,59 @@ v_cmp_le_u64_e64 s[10:11], v[1:2], -4.0 v_cmp_gt_u64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0xc8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0xc8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0xc8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0xc8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0xc8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0xc8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0xc8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0xc8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0xc8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0xc8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0xc8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0xc8,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0xc8,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0xc9,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0xe4,0xd4,0x01,0x05,0x02,0x00] @@ -105128,59 +105128,59 @@ v_cmp_gt_u64_e64 s[10:11], v[1:2], -4.0 v_cmp_ne_u64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0xca,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0xca,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0xca,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0xca,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0xca,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0xca,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0xca,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0xca,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0xca,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0xca,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0xca,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0xca,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0xca,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0xcb,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0xe5,0xd4,0x01,0x05,0x02,0x00] @@ -105280,59 +105280,59 @@ v_cmp_ne_u64_e64 s[10:11], v[1:2], -4.0 v_cmp_ge_u64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0xcc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0xcc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0xcc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0xcc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0xcc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0xcc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0xcc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0xcc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0xcc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0xcc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0xcc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0xcc,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0xcc,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0xcd,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0xe6,0xd4,0x01,0x05,0x02,0x00] @@ -105432,59 +105432,59 @@ v_cmp_ge_u64_e64 s[10:11], v[1:2], -4.0 v_cmp_t_u64 vcc, v[1:2], v[2:3] // W64: encoding: [0x01,0x05,0xce,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc, v[254:255], v[2:3] // W64: encoding: [0xfe,0x05,0xce,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc, s[2:3], v[2:3] // W64: encoding: [0x02,0x04,0xce,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc, s[4:5], v[2:3] // W64: encoding: [0x04,0x04,0xce,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc, s[100:101], v[2:3] // W64: encoding: [0x64,0x04,0xce,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc, vcc, v[2:3] // W64: encoding: [0x6a,0x04,0xce,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc, exec, v[2:3] // W64: encoding: [0x7e,0x04,0xce,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc, 0, v[2:3] // W64: encoding: [0x80,0x04,0xce,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc, -1, v[2:3] // W64: encoding: [0xc1,0x04,0xce,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc, 0.5, v[2:3] // W64: encoding: [0xf0,0x04,0xce,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc, -4.0, v[2:3] // W64: encoding: [0xf7,0x04,0xce,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc, 0xaf123456, v[2:3] // W64: encoding: [0xff,0x04,0xce,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc, 0x3f717273, v[2:3] // W64: encoding: [0xff,0x04,0xce,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc, v[1:2], v[254:255] // W64: encoding: [0x01,0xfd,0xcf,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64_e64 s[10:11], v[1:2], v[2:3] // W64: encoding: [0x0a,0x00,0xe7,0xd4,0x01,0x05,0x02,0x00] @@ -105584,59 +105584,59 @@ v_cmp_t_u64_e64 s[10:11], v[1:2], -4.0 v_cmp_f_i64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x40,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x40,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x40,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x40,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x40,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x40,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x40,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x40,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x40,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x40,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x40,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x40,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x40,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x41,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_i64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0xa0,0xd4,0x01,0x05,0x02,0x00] @@ -105736,59 +105736,59 @@ v_cmp_f_i64_e64 s10, v[1:2], -4.0 v_cmp_lt_i64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x42,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x42,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x42,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x42,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x42,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x42,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x42,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x42,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x42,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x42,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x42,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x42,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x42,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x43,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_i64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0xa1,0xd4,0x01,0x05,0x02,0x00] @@ -105888,59 +105888,59 @@ v_cmp_lt_i64_e64 s10, v[1:2], -4.0 v_cmp_eq_i64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x44,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x44,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x44,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x44,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x44,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x44,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x44,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x44,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x44,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x44,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x44,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x44,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x44,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x45,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_i64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0xa2,0xd4,0x01,0x05,0x02,0x00] @@ -106040,59 +106040,59 @@ v_cmp_eq_i64_e64 s10, v[1:2], -4.0 v_cmp_le_i64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x46,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x46,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x46,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x46,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x46,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x46,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x46,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x46,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x46,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x46,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x46,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x46,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x46,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x47,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_i64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0xa3,0xd4,0x01,0x05,0x02,0x00] @@ -106192,59 +106192,59 @@ v_cmp_le_i64_e64 s10, v[1:2], -4.0 v_cmp_gt_i64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x48,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x48,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x48,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x48,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x48,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x48,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x48,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x48,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x48,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x48,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x48,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x48,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x48,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x49,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_i64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0xa4,0xd4,0x01,0x05,0x02,0x00] @@ -106344,59 +106344,59 @@ v_cmp_gt_i64_e64 s10, v[1:2], -4.0 v_cmp_ne_i64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x4a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x4a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x4a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x4a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x4a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x4a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x4a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x4a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x4a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x4a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x4a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x4a,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x4a,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x4b,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_i64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0xa5,0xd4,0x01,0x05,0x02,0x00] @@ -106496,59 +106496,59 @@ v_cmp_ne_i64_e64 s10, v[1:2], -4.0 v_cmp_ge_i64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x4c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x4c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x4c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x4c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x4c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x4c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x4c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x4c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x4c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x4c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x4c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x4c,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x4c,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x4d,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_i64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0xa6,0xd4,0x01,0x05,0x02,0x00] @@ -106648,59 +106648,59 @@ v_cmp_ge_i64_e64 s10, v[1:2], -4.0 v_cmp_t_i64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0x4e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0x4e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0x4e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0x4e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0x4e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0x4e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0x4e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0x4e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0x4e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0x4e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0x4e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0x4e,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0x4e,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0x4f,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_i64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0xa7,0xd4,0x01,0x05,0x02,0x00] @@ -106800,59 +106800,59 @@ v_cmp_t_i64_e64 s10, v[1:2], -4.0 v_cmp_class_f64 vcc_lo, v[1:2], v2 // W32: encoding: [0x01,0x05,0x50,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc_lo, v[254:255], v2 // W32: encoding: [0xfe,0x05,0x50,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc_lo, s[2:3], v2 // W32: encoding: [0x02,0x04,0x50,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc_lo, s[4:5], v2 // W32: encoding: [0x04,0x04,0x50,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc_lo, s[100:101], v2 // W32: encoding: [0x64,0x04,0x50,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc_lo, vcc, v2 // W32: encoding: [0x6a,0x04,0x50,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc_lo, exec, v2 // W32: encoding: [0x7e,0x04,0x50,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x50,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x50,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x50,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x50,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x50,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x50,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64 vcc_lo, v[1:2], v255 // W32: encoding: [0x01,0xff,0x51,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_class_f64_e64 s10, v[1:2], v2 // W32: encoding: [0x0a,0x00,0xa8,0xd4,0x01,0x05,0x02,0x00] @@ -106964,59 +106964,59 @@ v_cmp_class_f64_e64 s10, -v[1:2], v2 v_cmp_f_u64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0xc0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0xc0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0xc0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0xc0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0xc0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0xc0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0xc0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0xc0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0xc0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0xc0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0xc0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0xc0,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0xc0,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0xc1,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0xe0,0xd4,0x01,0x05,0x02,0x00] @@ -107116,59 +107116,59 @@ v_cmp_f_u64_e64 s10, v[1:2], -4.0 v_cmp_lt_u64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0xc2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0xc2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0xc2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0xc2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0xc2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0xc2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0xc2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0xc2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0xc2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0xc2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0xc2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0xc2,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0xc2,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0xc3,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0xe1,0xd4,0x01,0x05,0x02,0x00] @@ -107268,59 +107268,59 @@ v_cmp_lt_u64_e64 s10, v[1:2], -4.0 v_cmp_eq_u64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0xc4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0xc4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0xc4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0xc4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0xc4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0xc4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0xc4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0xc4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0xc4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0xc4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0xc4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0xc4,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0xc4,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0xc5,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0xe2,0xd4,0x01,0x05,0x02,0x00] @@ -107420,59 +107420,59 @@ v_cmp_eq_u64_e64 s10, v[1:2], -4.0 v_cmp_le_u64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0xc6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0xc6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0xc6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0xc6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0xc6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0xc6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0xc6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0xc6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0xc6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0xc6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0xc6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0xc6,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0xc6,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0xc7,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0xe3,0xd4,0x01,0x05,0x02,0x00] @@ -107572,59 +107572,59 @@ v_cmp_le_u64_e64 s10, v[1:2], -4.0 v_cmp_gt_u64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0xc8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0xc8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0xc8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0xc8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0xc8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0xc8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0xc8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0xc8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0xc8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0xc8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0xc8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0xc8,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0xc8,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0xc9,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0xe4,0xd4,0x01,0x05,0x02,0x00] @@ -107724,59 +107724,59 @@ v_cmp_gt_u64_e64 s10, v[1:2], -4.0 v_cmp_ne_u64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0xca,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0xca,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0xca,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0xca,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0xca,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0xca,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0xca,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0xca,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0xca,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0xca,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0xca,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0xca,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0xca,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0xcb,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0xe5,0xd4,0x01,0x05,0x02,0x00] @@ -107876,59 +107876,59 @@ v_cmp_ne_u64_e64 s10, v[1:2], -4.0 v_cmp_ge_u64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0xcc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0xcc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0xcc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0xcc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0xcc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0xcc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0xcc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0xcc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0xcc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0xcc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0xcc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0xcc,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0xcc,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0xcd,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0xe6,0xd4,0x01,0x05,0x02,0x00] @@ -108028,59 +108028,59 @@ v_cmp_ge_u64_e64 s10, v[1:2], -4.0 v_cmp_t_u64 vcc_lo, v[1:2], v[2:3] // W32: encoding: [0x01,0x05,0xce,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc_lo, v[254:255], v[2:3] // W32: encoding: [0xfe,0x05,0xce,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc_lo, s[2:3], v[2:3] // W32: encoding: [0x02,0x04,0xce,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc_lo, s[4:5], v[2:3] // W32: encoding: [0x04,0x04,0xce,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc_lo, s[100:101], v[2:3] // W32: encoding: [0x64,0x04,0xce,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc_lo, vcc, v[2:3] // W32: encoding: [0x6a,0x04,0xce,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc_lo, exec, v[2:3] // W32: encoding: [0x7e,0x04,0xce,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc_lo, 0, v[2:3] // W32: encoding: [0x80,0x04,0xce,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc_lo, -1, v[2:3] // W32: encoding: [0xc1,0x04,0xce,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc_lo, 0.5, v[2:3] // W32: encoding: [0xf0,0x04,0xce,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc_lo, -4.0, v[2:3] // W32: encoding: [0xf7,0x04,0xce,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc_lo, 0xaf123456, v[2:3] // W32: encoding: [0xff,0x04,0xce,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc_lo, 0x3f717273, v[2:3] // W32: encoding: [0xff,0x04,0xce,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64 vcc_lo, v[1:2], v[254:255] // W32: encoding: [0x01,0xfd,0xcf,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u64_e64 s10, v[1:2], v[2:3] // W32: encoding: [0x0a,0x00,0xe7,0xd4,0x01,0x05,0x02,0x00] @@ -108204,67 +108204,67 @@ v_cmpx_t_u64_e64 exec, v[2:3] v_cmp_lt_u16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x52,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x52,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x52,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x52,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x52,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x52,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x52,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x52,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x52,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x52,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x52,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, 0.5, v2 // W64: encoding: [0xff,0x04,0x52,0x7d,0x00,0x38,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, -4.0, v2 // W64: encoding: [0xff,0x04,0x52,0x7d,0x00,0xc4,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x52,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x52,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x53,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xa9,0xd4,0x01,0x05,0x02,0x00] @@ -108380,67 +108380,67 @@ v_cmp_lt_u16_e64 s[10:11], v1, -4.0 v_cmp_eq_u16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x54,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x54,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x54,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x54,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x54,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x54,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x54,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x54,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x54,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x54,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x54,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, 0.5, v2 // W64: encoding: [0xff,0x04,0x54,0x7d,0x00,0x38,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, -4.0, v2 // W64: encoding: [0xff,0x04,0x54,0x7d,0x00,0xc4,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x54,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x54,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x55,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xaa,0xd4,0x01,0x05,0x02,0x00] @@ -108556,67 +108556,67 @@ v_cmp_eq_u16_e64 s[10:11], v1, -4.0 v_cmp_le_u16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x56,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x56,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x56,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x56,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x56,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x56,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x56,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x56,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x56,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x56,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x56,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, 0.5, v2 // W64: encoding: [0xff,0x04,0x56,0x7d,0x00,0x38,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, -4.0, v2 // W64: encoding: [0xff,0x04,0x56,0x7d,0x00,0xc4,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x56,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x56,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x57,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xab,0xd4,0x01,0x05,0x02,0x00] @@ -108732,67 +108732,67 @@ v_cmp_le_u16_e64 s[10:11], v1, -4.0 v_cmp_gt_u16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x58,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x58,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x58,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x58,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x58,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x58,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x58,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x58,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x58,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x58,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x58,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc, 0.5, v2 // W64: encoding: [0xff,0x04,0x58,0x7d,0x00,0x38,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc, -4.0, v2 // W64: encoding: [0xff,0x04,0x58,0x7d,0x00,0xc4,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x58,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x58,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x59,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xac,0xd4,0x01,0x05,0x02,0x00] @@ -108908,67 +108908,67 @@ v_cmp_gt_u16_e64 s[10:11], v1, -4.0 v_cmp_ne_u16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x5a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x5a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x5a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x5a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x5a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x5a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x5a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x5a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x5a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x5a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x5a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, 0.5, v2 // W64: encoding: [0xff,0x04,0x5a,0x7d,0x00,0x38,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, -4.0, v2 // W64: encoding: [0xff,0x04,0x5a,0x7d,0x00,0xc4,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x5a,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x5a,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x5b,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xad,0xd4,0x01,0x05,0x02,0x00] @@ -109084,67 +109084,67 @@ v_cmp_ne_u16_e64 s[10:11], v1, -4.0 v_cmp_ge_u16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x5c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x5c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x5c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x5c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x5c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x5c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x5c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x5c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x5c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x5c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x5c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, 0.5, v2 // W64: encoding: [0xff,0x04,0x5c,0x7d,0x00,0x38,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, -4.0, v2 // W64: encoding: [0xff,0x04,0x5c,0x7d,0x00,0xc4,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x5c,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x5c,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x5d,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xae,0xd4,0x01,0x05,0x02,0x00] @@ -109260,67 +109260,67 @@ v_cmp_ge_u16_e64 s[10:11], v1, -4.0 v_cmp_f_u32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x80,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x80,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x80,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x80,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x80,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x80,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x80,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x80,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x80,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x80,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x80,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x80,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x80,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x80,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x80,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x81,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xc0,0xd4,0x01,0x05,0x02,0x00] @@ -109436,67 +109436,67 @@ v_cmp_f_u32_e64 s[10:11], v1, -4.0 v_cmp_lt_u32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x82,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x82,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x82,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x82,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x82,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x82,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x82,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x82,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x82,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x82,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x82,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x82,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x82,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x82,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x82,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x83,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xc1,0xd4,0x01,0x05,0x02,0x00] @@ -109612,67 +109612,67 @@ v_cmp_lt_u32_e64 s[10:11], v1, -4.0 v_cmp_eq_u32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x84,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x84,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x84,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x84,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x84,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x84,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x84,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x84,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x84,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x84,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x84,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x84,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x84,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x84,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x84,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x85,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xc2,0xd4,0x01,0x05,0x02,0x00] @@ -109788,67 +109788,67 @@ v_cmp_eq_u32_e64 s[10:11], v1, -4.0 v_cmp_le_u32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x86,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x86,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x86,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x86,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x86,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x86,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x86,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x86,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x86,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x86,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x86,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x86,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x86,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x86,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x86,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x87,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xc3,0xd4,0x01,0x05,0x02,0x00] @@ -109964,67 +109964,67 @@ v_cmp_le_u32_e64 s[10:11], v1, -4.0 v_cmp_gt_u32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x88,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x88,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x88,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x88,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x88,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x88,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x88,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x88,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x88,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x88,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x88,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x88,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x88,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x88,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x88,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x89,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xc4,0xd4,0x01,0x05,0x02,0x00] @@ -110140,67 +110140,67 @@ v_cmp_gt_u32_e64 s[10:11], v1, -4.0 v_cmp_ne_u32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x8a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x8a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x8a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x8a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x8a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x8a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x8a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x8a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x8a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x8a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x8a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x8a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x8a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x8a,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x8a,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x8b,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xc5,0xd4,0x01,0x05,0x02,0x00] @@ -110316,67 +110316,67 @@ v_cmp_ne_u32_e64 s[10:11], v1, -4.0 v_cmp_ge_u32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x8c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x8c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x8c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x8c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x8c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x8c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x8c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x8c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x8c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x8c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x8c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x8c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x8c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x8c,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x8c,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x8d,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xc6,0xd4,0x01,0x05,0x02,0x00] @@ -110492,67 +110492,67 @@ v_cmp_ge_u32_e64 s[10:11], v1, -4.0 v_cmp_t_u32 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x8e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x8e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x8e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x8e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x8e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x8e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x8e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x8e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x8e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x8e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x8e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x8e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x8e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc, 0xaf123456, v2 // W64: encoding: [0xff,0x04,0x8e,0x7d,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc, 0x3f717273, v2 // W64: encoding: [0xff,0x04,0x8e,0x7d,0x73,0x72,0x71,0x3f] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x8f,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xc7,0xd4,0x01,0x05,0x02,0x00] @@ -110668,67 +110668,67 @@ v_cmp_t_u32_e64 s[10:11], v1, -4.0 v_cmp_f_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x90,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x90,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x90,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x90,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x90,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x90,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x90,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x90,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x90,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x90,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x90,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x90,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x90,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x90,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x90,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x91,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xc8,0xd4,0x01,0x05,0x02,0x00] @@ -110860,67 +110860,67 @@ v_cmp_f_f16_e64 s[10:11], v1, v2 clamp v_cmp_lt_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x92,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x92,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x92,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x92,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x92,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x92,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x92,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x92,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x92,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x92,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x92,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x92,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x92,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x92,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x92,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x93,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xc9,0xd4,0x01,0x05,0x02,0x00] @@ -111052,67 +111052,67 @@ v_cmp_lt_f16_e64 s[10:11], v1, v2 clamp v_cmp_eq_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x94,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x94,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x94,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x94,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x94,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x94,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x94,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x94,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x94,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x94,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x94,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x94,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x94,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x94,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x94,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x95,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xca,0xd4,0x01,0x05,0x02,0x00] @@ -111244,67 +111244,67 @@ v_cmp_eq_f16_e64 s[10:11], v1, v2 clamp v_cmp_le_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x96,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x96,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x96,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x96,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x96,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x96,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x96,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x96,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x96,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x96,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x96,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x96,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x96,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x96,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x96,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x97,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xcb,0xd4,0x01,0x05,0x02,0x00] @@ -111436,67 +111436,67 @@ v_cmp_le_f16_e64 s[10:11], v1, v2 clamp v_cmp_gt_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x98,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x98,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x98,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x98,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x98,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x98,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x98,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x98,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x98,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x98,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x98,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x98,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x98,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x98,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x98,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x99,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xcc,0xd4,0x01,0x05,0x02,0x00] @@ -111628,67 +111628,67 @@ v_cmp_gt_f16_e64 s[10:11], v1, v2 clamp v_cmp_lg_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x9a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x9a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x9a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x9a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x9a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x9a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x9a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x9a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x9a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x9a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x9a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x9a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x9a,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x9a,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x9a,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x9b,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xcd,0xd4,0x01,0x05,0x02,0x00] @@ -111820,67 +111820,67 @@ v_cmp_lg_f16_e64 s[10:11], v1, v2 clamp v_cmp_ge_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x9c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x9c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x9c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x9c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x9c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x9c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x9c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x9c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x9c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x9c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x9c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x9c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x9c,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x9c,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x9c,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x9d,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xce,0xd4,0x01,0x05,0x02,0x00] @@ -112012,67 +112012,67 @@ v_cmp_ge_f16_e64 s[10:11], v1, v2 clamp v_cmp_o_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0x9e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0x9e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0x9e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0x9e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0x9e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0x9e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0x9e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0x9e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0x9e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0x9e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0x9e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0x9e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0x9e,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0x9e,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0x9e,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0x9f,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xcf,0xd4,0x01,0x05,0x02,0x00] @@ -112204,67 +112204,67 @@ v_cmp_o_f16_e64 s[10:11], v1, v2 clamp v_cmp_u_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0xd0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0xd0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0xd0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0xd0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0xd0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0xd0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0xd0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0xd0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0xd0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0xd0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0xd0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0xd0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0xd0,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0xd0,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0xd0,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0xd1,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xe8,0xd4,0x01,0x05,0x02,0x00] @@ -112396,67 +112396,67 @@ v_cmp_u_f16_e64 s[10:11], v1, v2 clamp v_cmp_nge_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0xd2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0xd2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0xd2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0xd2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0xd2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0xd2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0xd2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0xd2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0xd2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0xd2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0xd2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0xd2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0xd2,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0xd2,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0xd2,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0xd3,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xe9,0xd4,0x01,0x05,0x02,0x00] @@ -112588,67 +112588,67 @@ v_cmp_nge_f16_e64 s[10:11], v1, v2 clamp v_cmp_nlg_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0xd4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0xd4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0xd4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0xd4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0xd4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0xd4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0xd4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0xd4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0xd4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0xd4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0xd4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0xd4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0xd4,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0xd4,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0xd4,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0xd5,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xea,0xd4,0x01,0x05,0x02,0x00] @@ -112780,67 +112780,67 @@ v_cmp_nlg_f16_e64 s[10:11], v1, v2 clamp v_cmp_ngt_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0xd6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0xd6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0xd6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0xd6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0xd6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0xd6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0xd6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0xd6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0xd6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0xd6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0xd6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0xd6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0xd6,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0xd6,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0xd6,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0xd7,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xeb,0xd4,0x01,0x05,0x02,0x00] @@ -112972,67 +112972,67 @@ v_cmp_ngt_f16_e64 s[10:11], v1, v2 clamp v_cmp_nle_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0xd8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0xd8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0xd8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0xd8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0xd8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0xd8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0xd8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0xd8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0xd8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0xd8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0xd8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0xd8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0xd8,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0xd8,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0xd8,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0xd9,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xec,0xd4,0x01,0x05,0x02,0x00] @@ -113164,67 +113164,67 @@ v_cmp_nle_f16_e64 s[10:11], v1, v2 clamp v_cmp_neq_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0xda,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0xda,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0xda,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0xda,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0xda,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0xda,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0xda,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0xda,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0xda,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0xda,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0xda,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0xda,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0xda,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0xda,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0xda,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0xdb,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xed,0xd4,0x01,0x05,0x02,0x00] @@ -113356,67 +113356,67 @@ v_cmp_neq_f16_e64 s[10:11], v1, v2 clamp v_cmp_nlt_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0xdc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0xdc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0xdc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0xdc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0xdc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0xdc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0xdc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0xdc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0xdc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0xdc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0xdc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0xdc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0xdc,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0xdc,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0xdc,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0xdd,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xee,0xd4,0x01,0x05,0x02,0x00] @@ -113548,67 +113548,67 @@ v_cmp_nlt_f16_e64 s[10:11], v1, v2 clamp v_cmp_tru_f16 vcc, v1, v2 // W64: encoding: [0x01,0x05,0xde,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc, v255, v2 // W64: encoding: [0xff,0x05,0xde,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc, s1, v2 // W64: encoding: [0x01,0x04,0xde,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc, s101, v2 // W64: encoding: [0x65,0x04,0xde,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc, vcc_lo, v2 // W64: encoding: [0x6a,0x04,0xde,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc, vcc_hi, v2 // W64: encoding: [0x6b,0x04,0xde,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc, m0, v2 // W64: encoding: [0x7c,0x04,0xde,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc, exec_lo, v2 // W64: encoding: [0x7e,0x04,0xde,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc, exec_hi, v2 // W64: encoding: [0x7f,0x04,0xde,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc, 0, v2 // W64: encoding: [0x80,0x04,0xde,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc, -1, v2 // W64: encoding: [0xc1,0x04,0xde,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc, 0.5, v2 // W64: encoding: [0xf0,0x04,0xde,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc, -4.0, v2 // W64: encoding: [0xf7,0x04,0xde,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc, 0xfe0b, v2 // W64: encoding: [0xff,0x04,0xde,0x7d,0x0b,0xfe,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc, 0x3456, v2 // W64: encoding: [0xff,0x04,0xde,0x7d,0x56,0x34,0x00,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc, v1, v255 // W64: encoding: [0x01,0xff,0xdf,0x7d] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16_e64 s[10:11], v1, v2 // W64: encoding: [0x0a,0x00,0xef,0xd4,0x01,0x05,0x02,0x00] @@ -113740,67 +113740,67 @@ v_cmp_tru_f16_e64 s[10:11], v1, v2 clamp v_cmp_lt_u16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x52,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x52,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x52,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x52,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x52,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x52,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x52,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x52,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x52,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x52,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x52,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, 0.5, v2 // W32: encoding: [0xff,0x04,0x52,0x7d,0x00,0x38,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, -4.0, v2 // W32: encoding: [0xff,0x04,0x52,0x7d,0x00,0xc4,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x52,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x52,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x53,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xa9,0xd4,0x01,0x05,0x02,0x00] @@ -113916,67 +113916,67 @@ v_cmp_lt_u16_e64 s10, v1, -4.0 v_cmp_eq_u16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x54,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x54,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x54,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x54,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x54,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x54,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x54,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x54,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x54,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x54,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x54,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, 0.5, v2 // W32: encoding: [0xff,0x04,0x54,0x7d,0x00,0x38,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, -4.0, v2 // W32: encoding: [0xff,0x04,0x54,0x7d,0x00,0xc4,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x54,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x54,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x55,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xaa,0xd4,0x01,0x05,0x02,0x00] @@ -114092,67 +114092,67 @@ v_cmp_eq_u16_e64 s10, v1, -4.0 v_cmp_le_u16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x56,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x56,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x56,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x56,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x56,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x56,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x56,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x56,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x56,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x56,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x56,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, 0.5, v2 // W32: encoding: [0xff,0x04,0x56,0x7d,0x00,0x38,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, -4.0, v2 // W32: encoding: [0xff,0x04,0x56,0x7d,0x00,0xc4,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x56,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x56,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x57,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xab,0xd4,0x01,0x05,0x02,0x00] @@ -114268,67 +114268,67 @@ v_cmp_le_u16_e64 s10, v1, -4.0 v_cmp_gt_u16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x58,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x58,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x58,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x58,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x58,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x58,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x58,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x58,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x58,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x58,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x58,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc_lo, 0.5, v2 // W32: encoding: [0xff,0x04,0x58,0x7d,0x00,0x38,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc_lo, -4.0, v2 // W32: encoding: [0xff,0x04,0x58,0x7d,0x00,0xc4,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x58,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x58,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x59,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xac,0xd4,0x01,0x05,0x02,0x00] @@ -114444,67 +114444,67 @@ v_cmp_gt_u16_e64 s10, v1, -4.0 v_cmp_ne_u16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x5a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x5a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x5a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x5a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x5a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x5a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x5a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x5a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x5a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x5a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x5a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, 0.5, v2 // W32: encoding: [0xff,0x04,0x5a,0x7d,0x00,0x38,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, -4.0, v2 // W32: encoding: [0xff,0x04,0x5a,0x7d,0x00,0xc4,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x5a,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x5a,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x5b,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xad,0xd4,0x01,0x05,0x02,0x00] @@ -114620,67 +114620,67 @@ v_cmp_ne_u16_e64 s10, v1, -4.0 v_cmp_ge_u16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x5c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x5c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x5c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x5c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x5c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x5c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x5c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x5c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x5c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x5c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x5c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, 0.5, v2 // W32: encoding: [0xff,0x04,0x5c,0x7d,0x00,0x38,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, -4.0, v2 // W32: encoding: [0xff,0x04,0x5c,0x7d,0x00,0xc4,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x5c,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x5c,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x5d,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xae,0xd4,0x01,0x05,0x02,0x00] @@ -114796,67 +114796,67 @@ v_cmp_ge_u16_e64 s10, v1, -4.0 v_cmp_f_u32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x80,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x80,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x80,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x80,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x80,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x80,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x80,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x80,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x80,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x80,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x80,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x80,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x80,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x80,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x80,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x81,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xc0,0xd4,0x01,0x05,0x02,0x00] @@ -114972,67 +114972,67 @@ v_cmp_f_u32_e64 s10, v1, -4.0 v_cmp_lt_u32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x82,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x82,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x82,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x82,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x82,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x82,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x82,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x82,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x82,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x82,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x82,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x82,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x82,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x82,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x82,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x83,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xc1,0xd4,0x01,0x05,0x02,0x00] @@ -115148,67 +115148,67 @@ v_cmp_lt_u32_e64 s10, v1, -4.0 v_cmp_eq_u32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x84,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x84,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x84,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x84,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x84,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x84,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x84,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x84,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x84,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x84,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x84,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x84,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x84,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x84,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x84,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x85,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xc2,0xd4,0x01,0x05,0x02,0x00] @@ -115324,67 +115324,67 @@ v_cmp_eq_u32_e64 s10, v1, -4.0 v_cmp_le_u32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x86,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x86,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x86,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x86,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x86,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x86,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x86,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x86,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x86,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x86,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x86,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x86,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x86,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x86,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x86,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x87,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xc3,0xd4,0x01,0x05,0x02,0x00] @@ -115500,67 +115500,67 @@ v_cmp_le_u32_e64 s10, v1, -4.0 v_cmp_gt_u32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x88,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x88,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x88,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x88,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x88,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x88,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x88,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x88,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x88,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x88,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x88,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x88,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x88,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x88,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x88,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x89,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xc4,0xd4,0x01,0x05,0x02,0x00] @@ -115676,67 +115676,67 @@ v_cmp_gt_u32_e64 s10, v1, -4.0 v_cmp_ne_u32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x8a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x8a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x8a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x8a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x8a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x8a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x8a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x8a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x8a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x8a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x8a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x8a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x8a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x8a,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x8a,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x8b,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xc5,0xd4,0x01,0x05,0x02,0x00] @@ -115852,67 +115852,67 @@ v_cmp_ne_u32_e64 s10, v1, -4.0 v_cmp_ge_u32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x8c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x8c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x8c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x8c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x8c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x8c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x8c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x8c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x8c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x8c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x8c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x8c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x8c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x8c,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x8c,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x8d,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xc6,0xd4,0x01,0x05,0x02,0x00] @@ -116028,67 +116028,67 @@ v_cmp_ge_u32_e64 s10, v1, -4.0 v_cmp_t_u32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x8e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x8e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x8e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x8e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x8e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x8e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x8e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x8e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x8e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x8e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x8e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x8e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x8e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc_lo, 0xaf123456, v2 // W32: encoding: [0xff,0x04,0x8e,0x7d,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc_lo, 0x3f717273, v2 // W32: encoding: [0xff,0x04,0x8e,0x7d,0x73,0x72,0x71,0x3f] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x8f,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xc7,0xd4,0x01,0x05,0x02,0x00] @@ -116204,67 +116204,67 @@ v_cmp_t_u32_e64 s10, v1, -4.0 v_cmp_f_f16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x90,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x90,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x90,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x90,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x90,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x90,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x90,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x90,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x90,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x90,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x90,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x90,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x90,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x90,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x90,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x91,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xc8,0xd4,0x01,0x05,0x02,0x00] @@ -116396,67 +116396,67 @@ v_cmp_f_f16_e64 s10, v1, v2 clamp v_cmp_lt_f16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x92,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x92,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x92,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x92,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x92,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x92,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x92,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x92,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x92,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x92,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x92,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x92,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x92,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x92,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x92,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x93,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xc9,0xd4,0x01,0x05,0x02,0x00] @@ -116588,67 +116588,67 @@ v_cmp_lt_f16_e64 s10, v1, v2 clamp v_cmp_eq_f16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x94,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x94,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x94,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x94,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x94,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x94,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x94,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x94,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x94,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x94,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x94,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x94,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x94,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x94,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x94,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x95,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xca,0xd4,0x01,0x05,0x02,0x00] @@ -116780,67 +116780,67 @@ v_cmp_eq_f16_e64 s10, v1, v2 clamp v_cmp_le_f16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x96,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x96,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x96,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x96,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x96,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x96,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x96,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x96,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x96,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x96,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x96,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x96,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x96,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x96,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x96,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x97,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xcb,0xd4,0x01,0x05,0x02,0x00] @@ -116972,67 +116972,67 @@ v_cmp_le_f16_e64 s10, v1, v2 clamp v_cmp_gt_f16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x98,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x98,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x98,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x98,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x98,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x98,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x98,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x98,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x98,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x98,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x98,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x98,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x98,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x98,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x98,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x99,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xcc,0xd4,0x01,0x05,0x02,0x00] @@ -117164,67 +117164,67 @@ v_cmp_gt_f16_e64 s10, v1, v2 clamp v_cmp_lg_f16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x9a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x9a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x9a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x9a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x9a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x9a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x9a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x9a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x9a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x9a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x9a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x9a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x9a,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x9a,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x9a,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x9b,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xcd,0xd4,0x01,0x05,0x02,0x00] @@ -117356,67 +117356,67 @@ v_cmp_lg_f16_e64 s10, v1, v2 clamp v_cmp_ge_f16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x9c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x9c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x9c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x9c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x9c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x9c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x9c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x9c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x9c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x9c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x9c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x9c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x9c,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x9c,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x9c,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x9d,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xce,0xd4,0x01,0x05,0x02,0x00] @@ -117548,67 +117548,67 @@ v_cmp_ge_f16_e64 s10, v1, v2 clamp v_cmp_o_f16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0x9e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0x9e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0x9e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0x9e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0x9e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0x9e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0x9e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0x9e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0x9e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0x9e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0x9e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0x9e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0x9e,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0x9e,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0x9e,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0x9f,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xcf,0xd4,0x01,0x05,0x02,0x00] @@ -117740,67 +117740,67 @@ v_cmp_o_f16_e64 s10, v1, v2 clamp v_cmp_u_f16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0xd0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0xd0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0xd0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0xd0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0xd0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0xd0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0xd0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0xd0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0xd0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0xd0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0xd0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0xd0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0xd0,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0xd0,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0xd0,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0xd1,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xe8,0xd4,0x01,0x05,0x02,0x00] @@ -117932,67 +117932,67 @@ v_cmp_u_f16_e64 s10, v1, v2 clamp v_cmp_nge_f16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0xd2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0xd2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0xd2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0xd2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0xd2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0xd2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0xd2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0xd2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0xd2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0xd2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0xd2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0xd2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0xd2,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0xd2,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0xd2,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0xd3,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xe9,0xd4,0x01,0x05,0x02,0x00] @@ -118124,67 +118124,67 @@ v_cmp_nge_f16_e64 s10, v1, v2 clamp v_cmp_nlg_f16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0xd4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0xd4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0xd4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0xd4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0xd4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0xd4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0xd4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0xd4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0xd4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0xd4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0xd4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0xd4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0xd4,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0xd4,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0xd4,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0xd5,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xea,0xd4,0x01,0x05,0x02,0x00] @@ -118316,67 +118316,67 @@ v_cmp_nlg_f16_e64 s10, v1, v2 clamp v_cmp_ngt_f16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0xd6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0xd6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0xd6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0xd6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0xd6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0xd6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0xd6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0xd6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0xd6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0xd6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0xd6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0xd6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0xd6,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0xd6,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0xd6,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0xd7,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xeb,0xd4,0x01,0x05,0x02,0x00] @@ -118508,67 +118508,67 @@ v_cmp_ngt_f16_e64 s10, v1, v2 clamp v_cmp_nle_f16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0xd8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0xd8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0xd8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0xd8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0xd8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0xd8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0xd8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0xd8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0xd8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0xd8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0xd8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0xd8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0xd8,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0xd8,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0xd8,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0xd9,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xec,0xd4,0x01,0x05,0x02,0x00] @@ -118700,67 +118700,67 @@ v_cmp_nle_f16_e64 s10, v1, v2 clamp v_cmp_neq_f16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0xda,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0xda,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0xda,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0xda,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0xda,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0xda,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0xda,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0xda,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0xda,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0xda,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0xda,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0xda,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0xda,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0xda,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0xda,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0xdb,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xed,0xd4,0x01,0x05,0x02,0x00] @@ -118892,67 +118892,67 @@ v_cmp_neq_f16_e64 s10, v1, v2 clamp v_cmp_nlt_f16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0xdc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0xdc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0xdc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0xdc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0xdc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0xdc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0xdc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0xdc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0xdc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0xdc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0xdc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0xdc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0xdc,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0xdc,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0xdc,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0xdd,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xee,0xd4,0x01,0x05,0x02,0x00] @@ -119084,67 +119084,67 @@ v_cmp_nlt_f16_e64 s10, v1, v2 clamp v_cmp_tru_f16 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0xde,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc_lo, v255, v2 // W32: encoding: [0xff,0x05,0xde,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc_lo, s1, v2 // W32: encoding: [0x01,0x04,0xde,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc_lo, s101, v2 // W32: encoding: [0x65,0x04,0xde,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc_lo, vcc_lo, v2 // W32: encoding: [0x6a,0x04,0xde,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc_lo, vcc_hi, v2 // W32: encoding: [0x6b,0x04,0xde,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc_lo, m0, v2 // W32: encoding: [0x7c,0x04,0xde,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc_lo, exec_lo, v2 // W32: encoding: [0x7e,0x04,0xde,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc_lo, exec_hi, v2 // W32: encoding: [0x7f,0x04,0xde,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc_lo, 0, v2 // W32: encoding: [0x80,0x04,0xde,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc_lo, -1, v2 // W32: encoding: [0xc1,0x04,0xde,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc_lo, 0.5, v2 // W32: encoding: [0xf0,0x04,0xde,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc_lo, -4.0, v2 // W32: encoding: [0xf7,0x04,0xde,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc_lo, 0xfe0b, v2 // W32: encoding: [0xff,0x04,0xde,0x7d,0x0b,0xfe,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc_lo, 0x3456, v2 // W32: encoding: [0xff,0x04,0xde,0x7d,0x56,0x34,0x00,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16 vcc_lo, v1, v255 // W32: encoding: [0x01,0xff,0xdf,0x7d] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16_e64 s10, v1, v2 // W32: encoding: [0x0a,0x00,0xef,0xd4,0x01,0x05,0x02,0x00] @@ -119288,7 +119288,7 @@ v_cmp_lt_u16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_lt_u16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x52,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x52,0x7d,0xff,0x86,0x06,0x06] @@ -119396,7 +119396,7 @@ v_cmp_eq_u16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_eq_u16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x54,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x54,0x7d,0xff,0x86,0x06,0x06] @@ -119504,7 +119504,7 @@ v_cmp_le_u16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_le_u16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x56,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x56,0x7d,0xff,0x86,0x06,0x06] @@ -119612,7 +119612,7 @@ v_cmp_gt_u16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_gt_u16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x58,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x58,0x7d,0xff,0x86,0x06,0x06] @@ -119720,7 +119720,7 @@ v_cmp_ne_u16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_ne_u16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x5a,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x5a,0x7d,0xff,0x86,0x06,0x06] @@ -119828,7 +119828,7 @@ v_cmp_ge_u16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_ge_u16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x5c,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x5c,0x7d,0xff,0x86,0x06,0x06] @@ -119936,7 +119936,7 @@ v_cmp_f_u32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_f_u32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x80,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_u32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x80,0x7d,0xff,0x86,0x06,0x06] @@ -120044,7 +120044,7 @@ v_cmp_lt_u32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_lt_u32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x82,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_u32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x82,0x7d,0xff,0x86,0x06,0x06] @@ -120152,7 +120152,7 @@ v_cmp_eq_u32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_eq_u32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x84,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_u32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x84,0x7d,0xff,0x86,0x06,0x06] @@ -120260,7 +120260,7 @@ v_cmp_le_u32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_le_u32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x86,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_u32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x86,0x7d,0xff,0x86,0x06,0x06] @@ -120368,7 +120368,7 @@ v_cmp_gt_u32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_gt_u32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x88,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_u32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x88,0x7d,0xff,0x86,0x06,0x06] @@ -120476,7 +120476,7 @@ v_cmp_ne_u32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_ne_u32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x8a,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ne_u32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x8a,0x7d,0xff,0x86,0x06,0x06] @@ -120584,7 +120584,7 @@ v_cmp_ge_u32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_ge_u32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x8c,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_u32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x8c,0x7d,0xff,0x86,0x06,0x06] @@ -120692,7 +120692,7 @@ v_cmp_t_u32_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_t_u32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x8e,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_t_u32_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x8e,0x7d,0xff,0x86,0x06,0x06] @@ -120800,7 +120800,7 @@ v_cmp_f_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_f_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x90,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_f_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x90,0x7d,0xff,0x86,0x06,0x06] @@ -120916,7 +120916,7 @@ v_cmp_lt_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_lt_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x92,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lt_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x92,0x7d,0xff,0x86,0x06,0x06] @@ -121032,7 +121032,7 @@ v_cmp_eq_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_eq_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x94,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_eq_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x94,0x7d,0xff,0x86,0x06,0x06] @@ -121148,7 +121148,7 @@ v_cmp_le_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_le_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x96,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_le_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x96,0x7d,0xff,0x86,0x06,0x06] @@ -121264,7 +121264,7 @@ v_cmp_gt_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_gt_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x98,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_gt_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x98,0x7d,0xff,0x86,0x06,0x06] @@ -121380,7 +121380,7 @@ v_cmp_lg_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_lg_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x9a,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_lg_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x9a,0x7d,0xff,0x86,0x06,0x06] @@ -121496,7 +121496,7 @@ v_cmp_ge_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_ge_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x9c,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ge_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x9c,0x7d,0xff,0x86,0x06,0x06] @@ -121612,7 +121612,7 @@ v_cmp_o_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x9e,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_o_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0x9e,0x7d,0xff,0x86,0x06,0x06] @@ -121728,7 +121728,7 @@ v_cmp_u_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_u_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xd0,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_u_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xd0,0x7d,0xff,0x86,0x06,0x06] @@ -121844,7 +121844,7 @@ v_cmp_nge_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_nge_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xd2,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nge_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xd2,0x7d,0xff,0x86,0x06,0x06] @@ -121960,7 +121960,7 @@ v_cmp_nlg_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_nlg_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xd4,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlg_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xd4,0x7d,0xff,0x86,0x06,0x06] @@ -122076,7 +122076,7 @@ v_cmp_ngt_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_ngt_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xd6,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_ngt_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xd6,0x7d,0xff,0x86,0x06,0x06] @@ -122192,7 +122192,7 @@ v_cmp_nle_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_nle_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xd8,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nle_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xd8,0x7d,0xff,0x86,0x06,0x06] @@ -122308,7 +122308,7 @@ v_cmp_neq_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_neq_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xda,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_neq_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xda,0x7d,0xff,0x86,0x06,0x06] @@ -122424,7 +122424,7 @@ v_cmp_nlt_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_nlt_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xdc,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_nlt_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xdc,0x7d,0xff,0x86,0x06,0x06] @@ -122540,7 +122540,7 @@ v_cmp_tru_f16_sdwa s[100:101], v1, v2 src0_sel:DWORD src1_sel:DWORD v_cmp_tru_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xde,0x7d,0x01,0x00,0x06,0x06] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_cmp_tru_f16_sdwa s[6:7], v255, v2 src0_sel:DWORD src1_sel:DWORD // W64: encoding: [0xf9,0x04,0xde,0x7d,0xff,0x86,0x06,0x06] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s b/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s index 01159c365ebc2..6fb8cf991d7c5 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s @@ -317,27 +317,27 @@ v_mac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 // W32: [0xfa,0x04,0x0a,0x50,0x01,0x1b,0x00,0x00] -// W64-ERR: error: instruction not supported on this GPU +// W64-ERR: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 // W32: [0xfa,0x04,0x0a,0x52,0x01,0x1b,0x00,0x00] -// W64-ERR: error: instruction not supported on this GPU +// W64-ERR: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 // W32: [0xfa,0x04,0x0a,0x54,0x01,0x1b,0x00,0x00] -// W64-ERR: error: instruction not supported on this GPU +// W64-ERR: error: operands are not valid for this GPU or mode v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 // W64: [0xfa,0x04,0x0a,0x50,0x01,0x1b,0x00,0x00] -// W32-ERR: error: instruction not supported on this GPU +// W32-ERR: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 // W64: [0xfa,0x04,0x0a,0x52,0x01,0x1b,0x00,0x00] -// W32-ERR: error: instruction not supported on this GPU +// W32-ERR: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 // W64: [0xfa,0x04,0x0a,0x54,0x01,0x1b,0x00,0x00] -// W32-ERR: error: instruction not supported on this GPU +// W32-ERR: error: operands are not valid for this GPU or mode v_fmac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 // GFX10: [0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0x00] @@ -605,27 +605,27 @@ v_mac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 // W32: [0xfa,0x04,0x0a,0x50,0x01,0x1b,0x04,0x00] -// W64-ERR: error: instruction not supported on this GPU +// W64-ERR: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 // W32: [0xfa,0x04,0x0a,0x52,0x01,0x1b,0x04,0x00] -// W64-ERR: error: instruction not supported on this GPU +// W64-ERR: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 // W32: [0xfa,0x04,0x0a,0x54,0x01,0x1b,0x04,0x00] -// W64-ERR: error: instruction not supported on this GPU +// W64-ERR: error: operands are not valid for this GPU or mode v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 // W64: [0xfa,0x04,0x0a,0x50,0x01,0x1b,0x04,0x00] -// W32-ERR: error: instruction not supported on this GPU +// W32-ERR: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 // W64: [0xfa,0x04,0x0a,0x52,0x01,0x1b,0x04,0x00] -// W32-ERR: error: instruction not supported on this GPU +// W32-ERR: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 // W64: [0xfa,0x04,0x0a,0x54,0x01,0x1b,0x04,0x00] -// W32-ERR: error: instruction not supported on this GPU +// W32-ERR: error: operands are not valid for this GPU or mode v_fmac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 // GFX10: [0xfa,0x04,0x0a,0x56,0x01,0x1b,0x04,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s index e6985532bd1a1..6a98669b170c9 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s @@ -506,19 +506,19 @@ v_ldexp_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] // W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x02,0x01,0x77,0x39,0x05] -// W64-ERR: error: instruction not supported on this GPU +// W64-ERR: error: operands are not valid for this GPU or mode v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[0,1,2,3,4,5,6,7] fi:1 // W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] -// W64-ERR: error: instruction not supported on this GPU +// W64-ERR: error: operands are not valid for this GPU or mode v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] // W64: v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x02,0x01,0x77,0x39,0x05] -// W32-ERR: error: instruction not supported on this GPU +// W32-ERR: error: operands are not valid for this GPU or mode v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[0,1,2,3,4,5,6,7] fi:1 // W64: v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] -// W32-ERR: error: instruction not supported on this GPU +// W32-ERR: error: operands are not valid for this GPU or mode v_cndmask_b32_dpp v0, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 // W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] @@ -526,27 +526,27 @@ v_cndmask_b32_dpp v0, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 v_add_co_ci_u32_dpp v0, vcc_lo, v0, v0, vcc_lo dpp8:[7,6,5,4,3,2,1,0] // W32: [0xe9,0x00,0x00,0x50,0x00,0x77,0x39,0x05] -// W64-ERR: error: instruction not supported on this GPU +// W64-ERR: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_dpp v0, vcc_lo, v0, v0, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:0 // W32: [0xe9,0x00,0x00,0x52,0x00,0x77,0x39,0x05] -// W64-ERR: error: instruction not supported on this GPU +// W64-ERR: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_dpp v0, vcc_lo, v0, v0, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 // W32: [0xea,0x00,0x00,0x54,0x00,0x77,0x39,0x05] -// W64-ERR: error: instruction not supported on this GPU +// W64-ERR: error: operands are not valid for this GPU or mode v_add_co_ci_u32_dpp v0, vcc, v0, v0, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 // W64: [0xea,0x00,0x00,0x50,0x00,0x77,0x39,0x05] -// W32-ERR: error: instruction not supported on this GPU +// W32-ERR: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_dpp v0, vcc, v0, v0, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 // W64: [0xea,0x00,0x00,0x52,0x00,0x77,0x39,0x05] -// W32-ERR: error: instruction not supported on this GPU +// W32-ERR: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_dpp v0, vcc, v0, v0, vcc dpp8:[7,6,5,4,3,2,1,0] // W64: [0xe9,0x00,0x00,0x54,0x00,0x77,0x39,0x05] -// W32-ERR: error: instruction not supported on this GPU +// W32-ERR: error: operands are not valid for this GPU or mode v_add_nc_u32_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX10: [0xe9,0xfe,0x0b,0x4a,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_mimg_err.s b/llvm/test/MC/AMDGPU/gfx10_asm_mimg_err.s index f99a295362369..7269074c9f46a 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_mimg_err.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_mimg_err.s @@ -1,38 +1,38 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=NOGFX10 --implicit-check-not=error: %s -; TODO: more helpful error message for missing dim operand +// TODO: more helpful error message for missing dim operand image_load v[0:3], v0, s[0:7] dmask:0xf unorm -; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D da -; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_load_pck v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D d16 -; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D -; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image data size does not match dmask and tfe +// NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image data size does not match dmask and tfe image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D -; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 +// NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE -; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 +// NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 image_sample_d v[0:3], [v0, v1, v2, v3, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY -; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 +// NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE -; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 +// NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 image_sample_c_d v[0:3], [v0, v1, v2, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY -; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 +// NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 image_sample_c_d_cl v[0:3], [v0, v1, v2, v3, v4, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY -; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 +// NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 image_sample_c_d_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D -; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 +// NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 image_load v[0:1], v0, s[0:7] dmask:0x9 dim:1 D -; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand diff --git a/llvm/test/MC/AMDGPU/lds_direct-err.s b/llvm/test/MC/AMDGPU/lds_direct-err.s index 3e5bfe48ca0a0..48314613b040c 100644 --- a/llvm/test/MC/AMDGPU/lds_direct-err.s +++ b/llvm/test/MC/AMDGPU/lds_direct-err.s @@ -58,7 +58,7 @@ v_subrev_u32 v0, src_lds_direct, v0 //---------------------------------------------------------------------------// v_writelane_b32 v0, lds_direct, s0 -// NOGFX9: error: instruction not supported on this GPU +// NOGFX9: error: operands are not valid for this GPU or mode //---------------------------------------------------------------------------// // lds_direct cannot be used with 64-bit and larger operands diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s index f639fd9b19fac..7d33414c904b4 100644 --- a/llvm/test/MC/AMDGPU/literals.s +++ b/llvm/test/MC/AMDGPU/literals.s @@ -570,12 +570,14 @@ v_add_u16_sdwa v0, scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD sr // GFX9: v_add_u16_sdwa v0, v0, src_scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xfa,0x01,0x4c,0x00,0x06,0x06,0x86] v_add_u16_sdwa v0, v0, scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICIVI: error: instruction not supported on this GPU // GFX9: v_add_u32_e32 v0, src_execz, v0 ; encoding: [0xfc,0x00,0x00,0x68] +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_add_u32 v0, execz, v0 -// NOSICIVI: error: instruction not supported on this GPU // GFX9: v_add_u32_e64 v0, src_scc, v0 ; encoding: [0x00,0x00,0x34,0xd1,0xfd,0x00,0x02,0x00] +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_add_u32_e64 v0, scc, v0 // SICI: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1] ; encoding: [0xfd,0x00,0x44,0x7d] @@ -797,8 +799,9 @@ v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD src0_sel:DWORD // NOVI: error: register not available on this GPU v_add_u32 v0, private_base, s0 -// NOSICIVI: error: instruction not supported on this GPU // NOGFX9: error: invalid operand (violates constant bus restrictions) +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_add_u32 v0, scc, s0 // v_div_fmas implicitly reads VCC diff --git a/llvm/test/MC/AMDGPU/mtbuf.s b/llvm/test/MC/AMDGPU/mtbuf.s index a405a8824df4a..7ffa5e032ac7a 100644 --- a/llvm/test/MC/AMDGPU/mtbuf.s +++ b/llvm/test/MC/AMDGPU/mtbuf.s @@ -277,7 +277,7 @@ tbuffer_store_format_xyzw v[1:4], v[1:2], ttmp[4:7], s0, format:[BUF_DATA_FORMAT // Check addr64 tbuffer_store_format_xyzw v[1:4], v[1:2], ttmp[4:7], s0, format:[BUF_DATA_FORMAT_32,BUF_NUM_FORMAT_FLOAT] addr64 // SICI: tbuffer_store_format_xyzw v[1:4], v[1:2], ttmp[4:7], s0 format:[BUF_DATA_FORMAT_32,BUF_NUM_FORMAT_FLOAT] addr64 ; encoding: [0x00,0x80,0xa7,0xeb,0x01,0x01,0x1d,0x00] -// VI-ERR: error: instruction not supported on this GPU +// VI-ERR: error: operands are not valid for this GPU or mode //===----------------------------------------------------------------------===// // Tests for symbolic format errors handling diff --git a/llvm/test/MC/AMDGPU/mubuf.s b/llvm/test/MC/AMDGPU/mubuf.s index ad0b9396753c1..89a0e291a51e4 100644 --- a/llvm/test/MC/AMDGPU/mubuf.s +++ b/llvm/test/MC/AMDGPU/mubuf.s @@ -164,35 +164,35 @@ buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 // SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x30,0xe0,0x02,0x01,0x01,0x01] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 // SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x01,0x01] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc // SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0x01,0x01] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc // SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x41,0x01] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe // SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x81,0x01] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe // SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x30,0xe0,0x02,0x01,0x81,0x01] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe // SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe // SICI: buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xdd,0x71] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode //===----------------------------------------------------------------------===// // store - immediate offset only @@ -344,35 +344,35 @@ buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 // SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x70,0xe0,0x02,0x01,0x01,0x01] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 // SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x01,0x01] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc // SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0x01,0x01] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc // SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x41,0x01] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe // SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x81,0x01] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe // SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x70,0xe0,0x02,0x01,0x81,0x01] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe // SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe // SICI: buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xdd,0x71] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode //===----------------------------------------------------------------------===// // Instructions @@ -513,23 +513,23 @@ buffer_wbinvl1_vol //===----------------------------------------------------------------------===// buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 // SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 ; encoding: [0x00,0x80,0xf0,0xe0,0x02,0x01,0x02,0xb8] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_atomic_inc v1, v[2:3], s[8:11], s4 addr64 // SICI: buffer_atomic_inc v1, v[2:3], s[8:11], s4 addr64 ; encoding: [0x00,0x80,0xf0,0xe0,0x02,0x01,0x02,0x04] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 slc // SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 slc ; encoding: [0x00,0x80,0xf0,0xe0,0x02,0x01,0x42,0xb8] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 offset:4 // SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 offset:4 ; encoding: [0x04,0x80,0xf0,0xe0,0x02,0x01,0x02,0xb8] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 offset:4 slc // SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 offset:4 slc ; encoding: [0x04,0x80,0xf0,0xe0,0x02,0x01,0x42,0xb8] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_atomic_inc v1, off, s[8:11], 56 // SICI: buffer_atomic_inc v1, off, s[8:11], 56 ; encoding: [0x00,0x00,0xf0,0xe0,0x00,0x01,0x02,0xb8] @@ -613,23 +613,23 @@ buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 slc buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 glc // SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 glc ; encoding: [0x00,0xc0,0xf0,0xe0,0x02,0x01,0x02,0xb8] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_atomic_inc v1, v[2:3], s[8:11], s4 addr64 glc // SICI: buffer_atomic_inc v1, v[2:3], s[8:11], s4 addr64 glc ; encoding: [0x00,0xc0,0xf0,0xe0,0x02,0x01,0x02,0x04] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 glc slc // SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 glc slc ; encoding: [0x00,0xc0,0xf0,0xe0,0x02,0x01,0x42,0xb8] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 offset:4 glc // SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 offset:4 glc ; encoding: [0x04,0xc0,0xf0,0xe0,0x02,0x01,0x02,0xb8] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 offset:4 glc slc // SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 offset:4 glc slc ; encoding: [0x04,0xc0,0xf0,0xe0,0x02,0x01,0x42,0xb8] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode buffer_atomic_inc v1, off, s[8:11], 56 glc // SICI: buffer_atomic_inc v1, off, s[8:11], 56 glc ; encoding: [0x00,0x40,0xf0,0xe0,0x00,0x01,0x02,0xb8] @@ -844,15 +844,15 @@ buffer_store_lds_dword s[4:7], s8 offset:4 lds glc slc // VI: buffer_store_lds_dword s[4:7], s8 offset:4 lds glc slc ; encoding: [0x04,0x40,0xf7,0xe0,0x00,0x00,0x01,0x08] buffer_load_dwordx2 v[1:2], off, s[4:7], s1 lds -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: operands are not valid for this GPU or mode // VI: buffer_load_dwordx2 v[1:2], off, s[4:7], s1 lds ; encoding: [0x00,0x00,0x55,0xe0,0x00,0x01,0x01,0x01] buffer_load_dwordx3 v[0:2], off, s[4:7], s0 offset:4095 lds -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: operands are not valid for this GPU or mode // VI: buffer_load_dwordx3 v[0:2], off, s[4:7], s0 offset:4095 lds ; encoding: [0xff,0x0f,0x59,0xe0,0x00,0x00,0x01,0x00] buffer_load_dwordx4 v[1:4], off, s[4:7], s1 lds -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: operands are not valid for this GPU or mode // VI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 lds ; encoding: [0x00,0x00,0x5d,0xe0,0x00,0x01,0x01,0x01] //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/smem.s b/llvm/test/MC/AMDGPU/smem.s index cbdcbc99cda9c..ff725dbba4132 100644 --- a/llvm/test/MC/AMDGPU/smem.s +++ b/llvm/test/MC/AMDGPU/smem.s @@ -1,4 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SICI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SICI -check-prefix=CI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=kaveri -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SICI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s @@ -720,11 +721,11 @@ s_load_dword s1, s[2:3], -1 // NOVI: error: expected a 20-bit unsigned offset // GFX9: s_load_dword s1, s[2:3], -0x1 ; encoding: [0x41,0x00,0x02,0xc0,0xff,0xff,0x1f,0x00] // GFX10: s_load_dword s1, s[2:3], -0x1 ; encoding: [0x41,0x00,0x00,0xf4,0xff,0xff,0x1f,0xfa] -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: operands are not valid for this GPU or mode s_buffer_load_dword s10, s[92:95], -1 // NOVI: error: expected a 20-bit unsigned offset -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: operands are not valid for this GPU or mode // NOGFX9: error: expected a 20-bit unsigned offset s_atomic_swap s5, s[2:3], -1 @@ -761,13 +762,13 @@ s_buffer_store_dword s10, s[92:95], 0xFFFFFFFFFFF00000 // NOVI: error: expected a 20-bit unsigned offset s_load_dword s1, s[2:3], 0xFFFFFFFFFFF00000 -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: operands are not valid for this GPU or mode // GFX10: s_load_dword s1, s[2:3], -0x100000 ; encoding: [0x41,0x00,0x00,0xf4,0x00,0x00,0x10,0xfa] // GFX9: s_load_dword s1, s[2:3], -0x100000 ; encoding: [0x41,0x00,0x02,0xc0,0x00,0x00,0x10,0x00] // NOVI: error: expected a 20-bit unsigned offset s_buffer_load_dword s10, s[92:95], 0xFFFFFFFFFFF00000 -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: operands are not valid for this GPU or mode // NOGFX9: error: expected a 20-bit unsigned offset // NOVI: error: expected a 20-bit unsigned offset diff --git a/llvm/test/MC/AMDGPU/smrd.s b/llvm/test/MC/AMDGPU/smrd.s index 43819935afd02..66a465bd33ba8 100644 --- a/llvm/test/MC/AMDGPU/smrd.s +++ b/llvm/test/MC/AMDGPU/smrd.s @@ -20,22 +20,22 @@ s_load_dword s1, s[2:3], 0xff // VI: s_load_dword s1, s[2:3], 0xff ; encoding: [0x41,0x00,0x02,0xc0,0xff,0x00,0x00,0x00] s_load_dword s1, s[2:3], 0x100 -// NOSI: error: instruction not supported on this GPU +// NOSI: error: operands are not valid for this GPU or mode // CI: s_load_dword s1, s[2:3], 0x100 ; encoding: [0xff,0x82,0x00,0xc0,0x00,0x01,0x00,0x00] // VI: s_load_dword s1, s[2:3], 0x100 ; encoding: [0x41,0x00,0x02,0xc0,0x00,0x01,0x00,0x00] s_load_dword s1, s[2:3], 0xfffff -// NOSI: error: instruction not supported on this GPU +// NOSI: error: operands are not valid for this GPU or mode // CI: s_load_dword s1, s[2:3], 0xfffff ; encoding: [0xff,0x82,0x00,0xc0,0xff,0xff,0x0f,0x00] // VI: s_load_dword s1, s[2:3], 0xfffff ; encoding: [0x41,0x00,0x02,0xc0,0xff,0xff,0x0f,0x00] s_load_dword s1, s[2:3], 0x100000 -// NOSI: error: instruction not supported on this GPU +// NOSI: error: operands are not valid for this GPU or mode // CI: s_load_dword s1, s[2:3], 0x100000 ; encoding: [0xff,0x82,0x00,0xc0,0x00,0x00,0x10,0x00] // NOVI: error: expected a 20-bit unsigned offset s_load_dword s1, s[2:3], 0xffffffff -// NOSI: error: instruction not supported on this GPU +// NOSI: error: operands are not valid for this GPU or mode // CI: s_load_dword s1, s[2:3], 0xffffffff ; encoding: [0xff,0x82,0x00,0xc0,0xff,0xff,0xff,0xff] // NOVI: error: expected a 20-bit unsigned offset diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s index db93478476b80..11d4f68312ad3 100644 --- a/llvm/test/MC/AMDGPU/vop2.s +++ b/llvm/test/MC/AMDGPU/vop2.s @@ -143,7 +143,7 @@ v_writelane_b32 v1, s2, 4 v_writelane_b32 v2, 1, s4 // SICI: v_writelane_b32 v255, 0xaf123456, 2 ; encoding: [0xff,0x04,0xff,0x05,0x56,0x34,0x12,0xaf] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_writelane_b32 v255, 0xaf123456, 2 // SICI: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06] diff --git a/llvm/test/MC/AMDGPU/vop3-gfx9.s b/llvm/test/MC/AMDGPU/vop3-gfx9.s index 5b1c7bdbaf133..2414bf68cc150 100644 --- a/llvm/test/MC/AMDGPU/vop3-gfx9.s +++ b/llvm/test/MC/AMDGPU/vop3-gfx9.s @@ -1,4 +1,7 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefixes=GCN,SI,SICI,SICIVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s | FileCheck -check-prefixes=GCN,CI,SICI,SICIVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefixes=GCN,VI,SICIVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefixes=GCN,GFX9 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefixes=NOSI,NOSICI,NOGCN --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck -check-prefixes=NOCI,NOSICI,NOGCN --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefixes=NOVI,NOGCN --implicit-check-not=error: %s @@ -235,33 +238,37 @@ v_sub_i16 v5, v1, v2 clamp v_fma_f16_e64 v5, v1, v2, v3 // GFX9: v_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04] v_fma_f16 v5, v1, -v2, v3 // GFX9: v_fma_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x44] // NOSICI: error: instruction not supported on this GPU +// VI: v_fma_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x44] v_fma_f16 v5, v1, v2, |v3| // GFX9: v_fma_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0x06,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_fma_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0xee,0xd1,0x01,0x05,0x0e,0x04] v_fma_f16 v5, v1, v2, v3 clamp // GFX9: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04] v_fma_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x06,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_fma_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x06,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x06,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_fma_legacy_f16_e64 v5, v1, v2, v3 // GFX9: v_fma_legacy_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04] @@ -282,41 +289,47 @@ v_fma_legacy_f16 v5, v1, v2, v3 clamp v_div_fixup_f16_e64 v5, 0.5, v2, v3 // GFX9: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0xf0,0x04,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04] v_div_fixup_f16 v5, v1, 0.5, v3 // GFX9: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0xe1,0x0d,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04] v_div_fixup_f16 v5, v1, v2, 0.5 // GFX9: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0xc2,0x03] // NOSICI: error: instruction not supported on this GPU +// VI: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03] v_div_fixup_f16 v5, -v1, v2, v3 // GFX9: v_div_fixup_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0x24] // NOSICI: error: instruction not supported on this GPU +// VI: v_div_fixup_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x24] v_div_fixup_f16 v5, |v1|, v2, v3 // GFX9: v_div_fixup_f16 v5, |v1|, v2, v3 ; encoding: [0x05,0x01,0x07,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_div_fixup_f16 v5, |v1|, v2, v3 ; encoding: [0x05,0x01,0xef,0xd1,0x01,0x05,0x0e,0x04] v_div_fixup_f16 v5, v1, v2, v3 clamp // GFX9: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04] v_div_fixup_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x07,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x07,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x07,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_div_fixup_legacy_f16_e64 v5, 0.5, v2, v3 // GFX9: v_div_fixup_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04] @@ -345,64 +358,72 @@ v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp v_mad_f16_e64 v5, 0.5, v2, v3 // GFX9: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0xf0,0x04,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04] v_mad_f16 v5, v1, 0.5, v3 // GFX9: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0xe1,0x0d,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04] v_mad_f16 v5, v1, v2, 0.5 // GFX9: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0xc2,0x03] // NOSICI: error: instruction not supported on this GPU +// VI: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03] v_mad_f16 v5, v1, v2, -v3 // GFX9: v_mad_f16 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0x84] // NOSICI: error: instruction not supported on this GPU +// VI: v_mad_f16 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x84] v_mad_f16 v5, v1, v2, |v3| // GFX9: v_mad_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0x03,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_mad_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0xea,0xd1,0x01,0x05,0x0e,0x04] v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,0] // GFX9: v_mad_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_mad_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x03,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_mad_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x03,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x03,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x03,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x03,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_mad_f16 v5, v1, v2, v3 clamp // GFX9: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04] v_mad_i16_e64 v5, 0, v2, v3 // GFX9: v_mad_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x80,0x04,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_mad_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x80,0x04,0x0e,0x04] v_mad_i16 v5, v1, -1, v3 // GFX9: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04] v_mad_i16 v5, v1, v2, -4.0 // NOGFX9: error: invalid literal operand @@ -412,16 +433,17 @@ v_mad_i16 v5, v1, v2, -4.0 v_mad_i16 v5, v1, v2, v3 clamp // GFX9: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x05,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0x0e,0x04] v_mad_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x05,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_mad_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_mad_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x05,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_mad_legacy_f16_e64 v5, 0.5, v2, v3 // GFX9: v_mad_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04] @@ -482,10 +504,12 @@ v_mad_legacy_u16 v5, v1, v2, -4.0 clamp v_mad_u16_e64 v5, 0, v2, v3 // GFX9: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04] v_mad_u16 v5, v1, -1, v3 // GFX9: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04] v_mad_u16 v5, v1, v2, -4.0 // NOGFX9: error: invalid literal operand @@ -495,45 +519,52 @@ v_mad_u16 v5, v1, v2, -4.0 v_mad_u16 v5, v1, v2, v3 clamp // GFX9: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x04,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0x0e,0x04] v_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x04,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_mad_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x04,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_mad_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x04,0xd2,0x01,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode v_interp_p2_f16 v5, v2, attr0.x, v3 // GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x00,0x04,0x0e,0x04] v_interp_p2_f16 v5, -v2, attr0.x, v3 // GFX9: v_interp_p2_f16 v5, -v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x44] // NOSICI: error: instruction not supported on this GPU +// VI: v_interp_p2_f16 v5, -v2, attr0.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x00,0x04,0x0e,0x44] v_interp_p2_f16 v5, v2, attr0.x, |v3| // GFX9: v_interp_p2_f16 v5, v2, attr0.x, |v3| ; encoding: [0x05,0x04,0x77,0xd2,0x00,0x04,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_interp_p2_f16 v5, v2, attr0.x, |v3| ; encoding: [0x05,0x04,0x76,0xd2,0x00,0x04,0x0e,0x04] v_interp_p2_f16 v5, v2, attr0.w, v3 // GFX9: v_interp_p2_f16 v5, v2, attr0.w, v3 ; encoding: [0x05,0x00,0x77,0xd2,0xc0,0x04,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_interp_p2_f16 v5, v2, attr0.w, v3 ; encoding: [0x05,0x00,0x76,0xd2,0xc0,0x04,0x0e,0x04] v_interp_p2_f16 v5, v2, attr0.x, v3 high // GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 high ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x05,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_interp_p2_f16 v5, v2, attr0.x, v3 high ; encoding: [0x05,0x00,0x76,0xd2,0x00,0x05,0x0e,0x04] v_interp_p2_f16 v5, v2, attr0.x, v3 clamp // GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x77,0xd2,0x00,0x04,0x0e,0x04] // NOSICI: error: instruction not supported on this GPU +// VI: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04] v_interp_p2_legacy_f16 v5, v2, attr31.x, v3 // GFX9: v_interp_p2_legacy_f16 v5, v2, attr31.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x1f,0x04,0x0e,0x04] @@ -648,18 +679,22 @@ v_subbrev_co_u32 v84, vcc, v13, v31, vcc v_add_co_u32 v84, vcc, v13, v31 // GFX9: v_add_co_u32_e32 v84, vcc, v13, v31 ; encoding: [0x0d,0x3f,0xa8,0x32] // NOVI: error: instruction not supported on this GPU +// SICI: v_add_i32_e64 v84, vcc, v13, v31 ; encoding: [0x54,0x6a,0x4a,0xd2,0x0d,0x3f,0x02,0x00] v_sub_co_u32 v84, vcc, v13, v31 // GFX9: v_sub_co_u32_e32 v84, vcc, v13, v31 ; encoding: [0x0d,0x3f,0xa8,0x34] // NOVI: error: instruction not supported on this GPU +// SICI: v_sub_i32_e64 v84, vcc, v13, v31 ; encoding: [0x54,0x6a,0x4c,0xd2,0x0d,0x3f,0x02,0x00] v_subrev_co_u32 v84, vcc, v13, v31 // GFX9: v_subrev_co_u32_e32 v84, vcc, v13, v31 ; encoding: [0x0d,0x3f,0xa8,0x36] // NOVI: error: instruction not supported on this GPU +// SICI: v_subrev_i32_e64 v84, vcc, v13, v31 ; encoding: [0x54,0x6a,0x4e,0xd2,0x0d,0x3f,0x02,0x00] v_add_i32 v1, v2, v3 // GFX9: v_add_i32 v1, v2, v3 ; encoding: [0x01,0x00,0x9c,0xd2,0x02,0x07,0x02,0x00] -// NOGCN: error: instruction not supported on this GPU +// NOSICI: error: operands are not valid for this GPU or mode +// NOVI: error: instruction not supported on this GPU v_add_i32 v1, v2, v3 clamp // GFX9: v_add_i32 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x9c,0xd2,0x02,0x07,0x02,0x00] @@ -668,7 +703,8 @@ v_add_i32 v1, v2, v3 clamp v_sub_i32 v1, v2, v3 // GFX9: v_sub_i32 v1, v2, v3 ; encoding: [0x01,0x00,0x9d,0xd2,0x02,0x07,0x02,0x00] -// NOGCN: error: instruction not supported on this GPU +// NOSICI: error: operands are not valid for this GPU or mode +// NOVI: error: instruction not supported on this GPU v_sub_i32 v1, v2, v3 clamp // GFX9: v_sub_i32 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x9d,0xd2,0x02,0x07,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/vop3-literal.s b/llvm/test/MC/AMDGPU/vop3-literal.s index 43223108163a0..55f75fe2649db 100644 --- a/llvm/test/MC/AMDGPU/vop3-literal.s +++ b/llvm/test/MC/AMDGPU/vop3-literal.s @@ -273,43 +273,43 @@ v_cmp_f_u64_e64 s[10:11], 0x3f717273, 0x3f717273 v_cmpx_class_f32_e64 0xaf123456, v2 // GFX10: v_cmpx_class_f32_e64 0xaf123456, v2 ; encoding: [0x00,0x00,0x98,0xd4,0xff,0x04,0x02,0x00,0x56,0x34,0x12,0xaf] -// GFX9-ERR: error: instruction not supported on this GPU +// GFX9-ERR: error: operands are not valid for this GPU or mode v_cmpx_class_f32_e64 v1, 0xaf123456 // GFX10: v_cmpx_class_f32_e64 v1, 0xaf123456 ; encoding: [0x00,0x00,0x98,0xd4,0x01,0xff,0x01,0x00,0x56,0x34,0x12,0xaf] -// GFX9-ERR: error: instruction not supported on this GPU +// GFX9-ERR: error: operands are not valid for this GPU or mode v_cmpx_class_f32_e64 0xaf123456, 0xaf123456 // GFX10: v_cmpx_class_f32_e64 0xaf123456, 0xaf123456 ; encoding: [0x00,0x00,0x98,0xd4,0xff,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -// GFX9-ERR: error: instruction not supported on this GPU +// GFX9-ERR: error: operands are not valid for this GPU or mode v_cmpx_class_f32_e64 0xaf123456, 0xaf123455 // GFX10-ERR: error: invalid literal operand -// GFX9-ERR: error: instruction not supported on this GPU +// GFX9-ERR: error: operands are not valid for this GPU or mode v_cmpx_lt_i16_e64 v1, 0x3456 // GFX10: v_cmpx_lt_i16_e64 v1, 0x3456 ; encoding: [0x00,0x00,0x99,0xd4,0x01,0xff,0x01,0x00,0x56,0x34,0x00,0x00] -// GFX9-ERR: error: instruction not supported on this GPU +// GFX9-ERR: error: operands are not valid for this GPU or mode v_cmpx_lt_i16_e64 0x3456, v2 // GFX10: v_cmpx_lt_i16_e64 0x3456, v2 ; encoding: [0x00,0x00,0x99,0xd4,0xff,0x04,0x02,0x00,0x56,0x34,0x00,0x00] -// GFX9-ERR: error: instruction not supported on this GPU +// GFX9-ERR: error: operands are not valid for this GPU or mode v_cmpx_lt_i16_e64 0x3456, 0x3456 // GFX10: v_cmpx_lt_i16_e64 0x3456, 0x3456 ; encoding: [0x00,0x00,0x99,0xd4,0xff,0xfe,0x01,0x00,0x56,0x34,0x00,0x00] -// GFX9-ERR: error: instruction not supported on this GPU +// GFX9-ERR: error: operands are not valid for this GPU or mode v_cmpx_f_i64_e64 0xaf123456, v[2:3] // GFX10: v_cmpx_f_i64_e64 0xaf123456, v[2:3] ; encoding: [0x00,0x00,0xb0,0xd4,0xff,0x04,0x02,0x00,0x56,0x34,0x12,0xaf] -// GFX9-ERR: error: instruction not supported on this GPU +// GFX9-ERR: error: operands are not valid for this GPU or mode v_cmpx_f_i64_e64 v[1:2], 0x3f717273 // GFX10: v_cmpx_f_i64_e64 v[1:2], 0x3f717273 ; encoding: [0x00,0x00,0xb0,0xd4,0x01,0xff,0x01,0x00,0x73,0x72,0x71,0x3f] -// GFX9-ERR: error: instruction not supported on this GPU +// GFX9-ERR: error: operands are not valid for this GPU or mode v_cmpx_f_i64_e64 0x3f717273, 0x3f717273 // GFX10: v_cmpx_f_i64_e64 0x3f717273, 0x3f717273 ; encoding: [0x00,0x00,0xb0,0xd4,0xff,0xfe,0x01,0x00,0x73,0x72,0x71,0x3f] -// GFX9-ERR: error: instruction not supported on this GPU +// GFX9-ERR: error: operands are not valid for this GPU or mode v_lshlrev_b64 v[5:6], 0xaf123456, v[2:3] // GFX10: v_lshlrev_b64 v[5:6], 0xaf123456, v[2:3] ; encoding: [0x05,0x00,0xff,0xd6,0xff,0x04,0x02,0x00,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/vop_sdwa.s b/llvm/test/MC/AMDGPU/vop_sdwa.s index 222e5dd4644b7..0878e59d89712 100644 --- a/llvm/test/MC/AMDGPU/vop_sdwa.s +++ b/llvm/test/MC/AMDGPU/vop_sdwa.s @@ -524,17 +524,17 @@ v_min_i16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_se v_ldexp_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: instruction not supported on this GPU +// NOGFX9: error: operands are not valid for this GPU or mode // VI: v_add_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02] v_add_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: instruction not supported on this GPU +// NOGFX9: error: operands are not valid for this GPU or mode // VI: v_sub_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02] v_sub_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: instruction not supported on this GPU +// NOGFX9: error: operands are not valid for this GPU or mode // VI: v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02] v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 @@ -685,22 +685,22 @@ v_cmpx_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 // NOSICI: error: invalid operand for instruction // VI: v_mac_f32_sdwa v3, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x0a,0x06,0x2c,0x04,0x16,0x05,0x06] -// NOGFX9: error: instruction not supported on this GPU +// NOGFX9: error: operands are not valid for this GPU or mode v_mac_f32 v3, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 // NOSICI: error: invalid operand for instruction // VI: v_mac_f32_sdwa v15, v99, v194 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:WORD_0 src1_sel:DWORD ; encoding: [0xf9,0x84,0x1f,0x2c,0x63,0x0e,0x04,0x06] -// NOGFX9: error: instruction not supported on this GPU +// NOGFX9: error: operands are not valid for this GPU or mode v_mac_f32 v15, v99, v194 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:WORD_0 // NOSICI: error: invalid operand for instruction // NOVI: error: invalid operand for instruction -// NOGFX9: error: instruction not supported on this GPU +// NOGFX9: error: operands are not valid for this GPU or mode v_mac_f32 v194, v13, v1 dst_sel:BYTE_0 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2 // NOSICI: error: instruction not supported on this GPU // VI: v_mac_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x46,0x02,0x06,0x05,0x02] -// NOGFX9: error: instruction not supported on this GPU +// NOGFX9: error: operands are not valid for this GPU or mode v_mac_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 //===----------------------------------------------------------------------===// @@ -763,12 +763,12 @@ v_cmp_eq_f32_sdwa vcc, v1, s22 src0_sel:WORD_1 src1_sel:BYTE_2 v_cmp_eq_f32_sdwa ttmp[12:13], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: sdwa variant of this instruction is not supported -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode // NOGFX9: error: register not available on this GPU v_cmp_eq_f32_sdwa tba, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: sdwa variant of this instruction is not supported -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode // NOGFX9: error: register not available on this GPU v_cmp_eq_f32_sdwa tma, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 @@ -1042,12 +1042,12 @@ v_mov_b32_sdwa v5, -17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD //===----------------------------------------------------------------------===// // NOSICI: error: sdwa variant of this instruction is not supported -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode // GFX9: v_cmp_eq_f32_sdwa s[2:3], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x82,0x05,0x02] v_cmp_eq_f32_sdwa s[2:3], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: sdwa variant of this instruction is not supported -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode // GFX9: v_cmp_eq_f32_sdwa exec, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0xfe,0x05,0x02] v_cmp_eq_f32_sdwa exec, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 @@ -1061,22 +1061,22 @@ v_cmp_eq_f32_sdwa exec, s2, v2 src0_sel:WORD_1 src1_sel:BYTE_2 //===----------------------------------------------------------------------===// // NOSICI: error: invalid operand for instruction -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode // GFX9: v_trunc_f32_sdwa v1, v2 mul:2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x38,0x02,0x7e,0x02,0x50,0x06,0x00] v_trunc_f32 v1, v2 mul:2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD // NOSICI: error: invalid operand for instruction -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode // GFX9: v_trunc_f32_sdwa v1, v2 clamp div:2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x38,0x02,0x7e,0x02,0xf0,0x06,0x00] v_trunc_f32 v1, v2 clamp div:2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD // NOSICI: error: invalid operand for instruction -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode // GFX9: v_add_f32_sdwa v0, v0, v0 mul:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x46,0x05,0x02] v_add_f32 v0, v0, v0 mul:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: invalid operand for instruction -// NOVI: error: instruction not supported on this GPU +// NOVI: error: operands are not valid for this GPU or mode // GFX9: v_add_f32_sdwa v0, v0, v0 clamp div:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0xe6,0x05,0x02] v_add_f32 v0, v0, v0 clamp div:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 diff --git a/llvm/test/MC/AMDGPU/wave32.s b/llvm/test/MC/AMDGPU/wave32.s index 2044f6ec3b481..0c9a3989e0a95 100644 --- a/llvm/test/MC/AMDGPU/wave32.s +++ b/llvm/test/MC/AMDGPU/wave32.s @@ -9,10 +9,10 @@ v_cmp_ge_i32_e32 s0, v0 v_cmp_ge_i32_e32 vcc_lo, s0, v1 // GFX1032: v_cmp_ge_i32_e32 vcc_lo, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d] -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_i32_e32 vcc, s0, v2 -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode // GFX1064: v_cmp_ge_i32_e32 vcc, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d] v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD @@ -25,10 +25,10 @@ v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD v_cmp_class_f32_e32 vcc_lo, s0, v0 // GFX1032: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d] -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_class_f32_e32 vcc, s0, v0 -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode // GFX1064: v_cmp_class_f32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d] // TODO-GFX10: The following encoding does not match SP3's encoding, which is: @@ -40,7 +40,7 @@ v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD // TODO-GFX10: The following encoding does not match SP3's encoding, which is: // [0xf9,0x04,0x1e,0x7d,0x01,0x06,0x06,0x06] v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode // GFX1064: v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06] v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD @@ -57,10 +57,10 @@ v_cndmask_b32_e32 v1, v2, v3, v_cndmask_b32_e32 v1, v2, v3, vcc_lo // GFX1032: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02] -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cndmask_b32_e32 v1, v2, v3, vcc -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode // GFX1064: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02] v_cndmask_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD @@ -69,10 +69,10 @@ v_cndmask_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel: v_cndmask_b32_sdwa v5, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD // GFX1032: v_cndmask_b32_sdwa v5, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06] -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode // GFX1064: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06] v_cndmask_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 @@ -81,10 +81,10 @@ v_cndmask_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX1032: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00] -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode // GFX1064: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00] v_add_co_u32_e32 v2, vcc_lo, s0, v2 @@ -97,10 +97,10 @@ v_add_co_u32_e32 v2, vcc, s0, v2 v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo // GFX1032: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50] -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode // GFX1064: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50] v_add_co_ci_u32_e32 v3, v3, v4 @@ -125,10 +125,10 @@ v_subrev_co_u32_e32 v2, vcc, s0, v2 v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo // GFX1032: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52] -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode // GFX1064: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52] v_sub_co_ci_u32_e32 v3, v3, v4 @@ -137,10 +137,10 @@ v_sub_co_ci_u32_e32 v3, v3, v4 v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo // GFX1032: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54] -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode // GFX1064: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54] v_subrev_co_ci_u32_e32 v1, 0, v1 @@ -161,10 +161,10 @@ v_add_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06] -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode // GFX1064: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06] v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -197,10 +197,10 @@ v_subrev_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWO v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX1032: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06] -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode // GFX1064: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06] v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -209,10 +209,10 @@ v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYT v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX1032: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06] -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode // GFX1064: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06] v_subrev_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -225,10 +225,10 @@ v_add_co_ci_u32 v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_ v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e] -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode // GFX1064: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e] v_add_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 @@ -249,10 +249,10 @@ v_add_co_ci_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX1032: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00] -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode // GFX1064: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00] v_sub_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 @@ -269,10 +269,10 @@ v_sub_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX1032: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00] -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode // GFX1064: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00] v_subrev_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 @@ -289,10 +289,10 @@ v_subrev_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0 v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX1032: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00] -// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode // GFX1064: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00] v_add_co_u32 v0, s0, v0, v2 From 55a60af237809308cf4731ec291cab32ea8c732a Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Mon, 5 Oct 2020 16:51:58 +0300 Subject: [PATCH 224/321] [llvm-readelf] - Implement --addrsig option. We have `--addrsig` implemented for `llvm-readobj`. Usually it is convenient to use a single tool for dumping, so it seems we might want to implement `--addrsig` for `llvm-readelf` too. I've selected a simple output format which is a bit similar to one, used for dumping of the symbol table. It looks like: ``` Address-significant symbols section '.llvm_addrsig' contains 2 entries: Num: Name 1: foo 2: bar ``` Differential revision: https://reviews.llvm.org/D88835 --- llvm/test/tools/llvm-readobj/ELF/addrsig.test | 85 +++++++++++++------ .../test/tools/llvm-readobj/ELF/demangle.test | 15 +++- llvm/tools/llvm-readobj/ELFDumper.cpp | 85 +++++++++++++------ 3 files changed, 130 insertions(+), 55 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/addrsig.test b/llvm/test/tools/llvm-readobj/ELF/addrsig.test index 24621d80f79e6..2a4ea86795b15 100644 --- a/llvm/test/tools/llvm-readobj/ELF/addrsig.test +++ b/llvm/test/tools/llvm-readobj/ELF/addrsig.test @@ -1,15 +1,21 @@ ## Show that llvm-readobj can dump SHT_LLVM_ADDRSIG sections. # RUN: yaml2obj --docnum=1 %s -o %t1.o -# RUN: llvm-readobj --addrsig %t1.o | FileCheck -DFILE=%t1.o %s --check-prefix LLVM -# RUN: not llvm-readelf --addrsig %t1.o 2>&1 | FileCheck -DFILE=%t1.o %s --check-prefix GNU +# RUN: llvm-readobj --addrsig %t1.o | FileCheck -DFILE=%t1.o %s --check-prefix=LLVM +# RUN: llvm-readelf --addrsig %t1.o | \ +# RUN: FileCheck -DFILE=%t1.o %s --strict-whitespace --match-full-lines --check-prefix=GNU # LLVM: Addrsig [ # LLVM-NEXT: Sym: foo (1) # LLVM-NEXT: Sym: bar (2) # LLVM-NEXT: ] -# GNU: error: '[[FILE]]': --addrsig: not implemented +# GNU:Address-significant symbols section '.llvm_addrsig' contains 2 entries: +# GNU-NEXT: Num: Name +# GNU-NEXT: 1: foo +# GNU-NEXT: 2: bar +# GNU-EMPTY: +# GNU-NOT:{{.}} --- !ELF FileHeader: @@ -17,29 +23,48 @@ FileHeader: Data: ELFDATA2LSB Type: ET_DYN Sections: - - Name: .llvm_addrsig - Type: SHT_LLVM_ADDRSIG + - Name: .llvm_addrsig + ShName: [[NAME=]] + Type: SHT_LLVM_ADDRSIG Symbols: [ foo, bar ] Symbols: - Name: foo - Name: bar +## Check what we print when it is impossible to read the name of the SHT_LLVM_ADDRSIG section. +## llvm-readelf reports a warning in this case. + +# RUN: yaml2obj --docnum=1 -DNAME=0xff %s -o %t1.name.o +# RUN: llvm-readobj --addrsig %t1.name.o 2>&1 | \ +# RUN: FileCheck -DFILE=%t1.name.o %s --check-prefix=LLVM --implicit-check-not=warning: +# RUN: llvm-readelf --addrsig %t1.name.o 2>&1 | \ +# RUN: FileCheck -DFILE=%t1.name.o %s --check-prefix=NAME-GNU --implicit-check-not=warning: + +# NAME-GNU: warning: '[[FILE]]': unable to get the name of SHT_LLVM_ADDRSIG section with index 1: a section [index 1] has an invalid sh_name (0xff) offset which goes past the end of the section name string table +# NAME-GNU: Address-significant symbols section '' contains 2 entries: +# NAME-GNU-NEXT: Num: Name +# NAME-GNU-NEXT: 1: foo +# NAME-GNU-NEXT: 2: bar + ## Check that llvm-readobj dumps any SHT_LLVM_ADDRSIG section when --all ## is specified for LLVM style, but not for GNU style. -## TODO: Refine the llvm-readelf check when GNU-style dumping is implemented. -# RUN: llvm-readobj --all %t1.o | FileCheck %s --check-prefix LLVM -# RUN: llvm-readelf --all %t1.o 2>&1 | FileCheck %s --implicit-check-not=warning --implicit-check-not=error +# RUN: llvm-readobj --all %t1.o | FileCheck %s --check-prefix=LLVM +# RUN: llvm-readelf --all %t1.o | FileCheck %s --implicit-check-not="Address-significant" ## Check we report a warning when the content of the SHT_LLVM_ADDRSIG section ## is broken (e.g. contains a malformed uleb128). # RUN: yaml2obj --docnum=2 %s -o %t2.1.o -# RUN: llvm-readobj --addrsig %t2.1.o 2>&1 | FileCheck %s -DFILE=%t2.1.o --check-prefix=MALFORMED +# RUN: llvm-readobj --addrsig %t2.1.o 2>&1 | FileCheck %s -DFILE=%t2.1.o --check-prefix=MALFORMED-LLVM +# RUN: llvm-readelf --addrsig %t2.1.o 2>&1 | FileCheck %s -DFILE=%t2.1.o --check-prefix=MALFORMED-GNU + +# MALFORMED-LLVM: Addrsig [ +# MALFORMED-LLVM-NEXT: warning: '[[FILE]]': unable to decode SHT_LLVM_ADDRSIG section with index 1: malformed uleb128, extends past end +# MALFORMED-LLVM-NEXT: ] -# MALFORMED: Addrsig [ -# MALFORMED-NEXT: warning: '[[FILE]]': unable to decode SHT_LLVM_ADDRSIG section with index 1: malformed uleb128, extends past end -# MALFORMED-NEXT: ] +# MALFORMED-GNU: warning: '[[FILE]]': unable to decode SHT_LLVM_ADDRSIG section with index 1: malformed uleb128, extends past end +# MALFORMED-GNU-NOT:{{.}} --- !ELF FileHeader: @@ -55,24 +80,36 @@ Sections: ## Check we report a warning when the content of the SHT_LLVM_ADDRSIG section can't be read. # RUN: yaml2obj --docnum=2 -DOFFSET=0xffffffff %s -o %t2.2.o -# RUN: llvm-readobj --addrsig %t2.2.o 2>&1 | FileCheck %s -DFILE=%t2.2.o --check-prefix=BROKEN-SEC +# RUN: llvm-readobj --addrsig %t2.2.o 2>&1 | FileCheck %s -DFILE=%t2.2.o --check-prefix=BROKEN-SEC-LLVM +# RUN: llvm-readelf --addrsig %t2.2.o 2>&1 | FileCheck %s -DFILE=%t2.2.o --check-prefix=BROKEN-SEC-GNU + +# BROKEN-SEC-LLVM: Addrsig [ +# BROKEN-SEC-LLVM-NEXT: warning: '[[FILE]]': section [index 1] has a sh_offset (0xffffffff) + sh_size (0x1) that is greater than the file size (0x168) +# BROKEN-SEC-LLVM-NEXT: ] -# BROKEN-SEC: Addrsig [ -# BROKEN-SEC-NEXT: warning: '[[FILE]]': section [index 1] has a sh_offset (0xffffffff) + sh_size (0x1) that is greater than the file size (0x168) -# BROKEN-SEC-NEXT: ] +# BROKEN-SEC-GNU: warning: '[[FILE]]': section [index 1] has a sh_offset (0xffffffff) + sh_size (0x1) that is greater than the file size (0x168) +# BROKEN-SEC-GNU-NOT:{{.}} ## Check we report a warning when SHT_LLVM_ADDRSIG references a symbol that can't be ## dumped (e.g. the index value is larger than the number of symbols in .symtab). # RUN: yaml2obj --docnum=3 %s -o %t3.o -# RUN: llvm-readobj --addrsig %t3.o 2>&1 | FileCheck %s -DFILE=%t3.o --check-prefix=INVALID-INDEX - -# INVALID-INDEX: Addrsig [ -# INVALID-INDEX-NEXT: Sym: foo (1) -# INVALID-INDEX-NEXT: warning: '[[FILE]]': unable to read the name of symbol with index 255: unable to get symbol from section [index 2]: invalid symbol index (255) -# INVALID-INDEX-NEXT: Sym: (255) -# INVALID-INDEX-NEXT: Sym: bar (2) -# INVALID-INDEX-NEXT: ] +# RUN: llvm-readobj --addrsig %t3.o 2>&1 | FileCheck %s -DFILE=%t3.o --check-prefix=INVALID-INDEX-LLVM +# RUN: llvm-readelf --addrsig %t3.o 2>&1 | FileCheck %s -DFILE=%t3.o --check-prefix=INVALID-INDEX-GNU + +# INVALID-INDEX-LLVM: Addrsig [ +# INVALID-INDEX-LLVM-NEXT: Sym: foo (1) +# INVALID-INDEX-LLVM-NEXT: warning: '[[FILE]]': unable to read the name of symbol with index 255: unable to get symbol from section [index 2]: invalid symbol index (255) +# INVALID-INDEX-LLVM-NEXT: Sym: (255) +# INVALID-INDEX-LLVM-NEXT: Sym: bar (2) +# INVALID-INDEX-LLVM-NEXT: ] + +# INVALID-INDEX-GNU: Address-significant symbols section '.llvm_addrsig' contains 3 entries: +# INVALID-INDEX-GNU-NEXT: Num: Name +# INVALID-INDEX-GNU-NEXT: 1: foo +# INVALID-INDEX-GNU-NEXT: warning: '[[FILE]]': unable to read the name of symbol with index 255: unable to get symbol from section [index 2]: invalid symbol index (255) +# INVALID-INDEX-GNU-NEXT: 2: +# INVALID-INDEX-GNU-NEXT: 3: bar --- !ELF FileHeader: diff --git a/llvm/test/tools/llvm-readobj/ELF/demangle.test b/llvm/test/tools/llvm-readobj/ELF/demangle.test index 910b48f1c8e3a..94a77cc2a9828 100644 --- a/llvm/test/tools/llvm-readobj/ELF/demangle.test +++ b/llvm/test/tools/llvm-readobj/ELF/demangle.test @@ -68,17 +68,17 @@ ## Check GNU output style. # RUN: llvm-readelf --symbols --relocations --dyn-symbols --dyn-relocations \ -# RUN: --elf-section-groups --demangle %t.so > %t.gnu.long +# RUN: --elf-section-groups --addrsig --demangle %t.so > %t.gnu.long # RUN: llvm-readelf --symbols --relocations --dyn-symbols --dyn-relocations \ -# RUN: --elf-section-groups -C %t.so > %t.gnu.short +# RUN: --elf-section-groups --addrsig -C %t.so > %t.gnu.short # RUN: FileCheck %s --input-file %t.gnu.long --check-prefixes=GNU-COMMON,GNU-DEMANGLE # RUN: diff %t.gnu.long %t.gnu.short ## Check that default is no demangling. # RUN: llvm-readelf --symbols --relocations --dyn-symbols --dyn-relocations \ -# RUN: --elf-section-groups %t.so > %t.gnu.default +# RUN: --elf-section-groups --addrsig %t.so > %t.gnu.default # RUN: llvm-readelf --symbols --relocations --dyn-symbols --dyn-relocations \ -# RUN: --elf-section-groups --demangle=false %t.so > %t.gnu.nodemangle +# RUN: --elf-section-groups --addrsig --demangle=false %t.so > %t.gnu.nodemangle # RUN: FileCheck %s --input-file %t.gnu.default --check-prefixes=GNU-COMMON,GNU-MANGLED # RUN: diff %t.gnu.default %t.gnu.nodemangle @@ -110,6 +110,13 @@ # GNU-DEMANGLE-SAME: [foo(char)] # GNU-MANGLED-SAME: [_Z3fooc] +# GNU-COMMON: Address-significant symbols section '.llvm_addrsig' contains 2 entries: +# GNU-COMMON: Num: Name +# GNU-DEMANGLE-NEXT: 1: foo(char) +# GNU-DEMANGLE-NEXT: 2: blah(float) +# GNU-MANGLED-NEXT: 1: _Z3fooc +# GNU-MANGLED-NEXT: 2: _Z4blahf + !ELF FileHeader: Class: ELFCLASS64 diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index b04f993088145..9d7209efbabe9 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -4722,8 +4722,62 @@ template void GNUStyle::printCGProfile() { OS << "GNUStyle::printCGProfile not implemented\n"; } +static Expected> toULEB128Array(ArrayRef Data) { + std::vector Ret; + const uint8_t *Cur = Data.begin(); + const uint8_t *End = Data.end(); + while (Cur != End) { + unsigned Size; + const char *Err; + Ret.push_back(decodeULEB128(Cur, &Size, End, &Err)); + if (Err) + return createError(Err); + Cur += Size; + } + return Ret; +} + +template +static Expected> +decodeAddrsigSection(const ELFFile &Obj, const typename ELFT::Shdr &Sec) { + Expected> ContentsOrErr = Obj.getSectionContents(Sec); + if (!ContentsOrErr) + return ContentsOrErr.takeError(); + + if (Expected> SymsOrErr = + toULEB128Array(*ContentsOrErr)) + return *SymsOrErr; + else + return createError("unable to decode " + describe(Obj, Sec) + ": " + + toString(SymsOrErr.takeError())); +} + template void GNUStyle::printAddrsig() { - reportError(createError("--addrsig: not implemented"), this->FileName); + const Elf_Shdr *Sec = this->dumper().getDotAddrsigSec(); + if (!Sec) + return; + + Expected> SymsOrErr = + decodeAddrsigSection(this->Obj, *Sec); + if (!SymsOrErr) { + this->reportUniqueWarning(SymsOrErr.takeError()); + return; + } + + StringRef Name = this->getPrintableSectionName(*Sec); + OS << "\nAddress-significant symbols section '" << Name << "'" + << " contains " << SymsOrErr->size() << " entries:\n"; + OS << " Num: Name\n"; + + Field Fields[2] = {0, 8}; + size_t SymIndex = 0; + for (uint64_t Sym : *SymsOrErr) { + Fields[0].Str = to_string(format_decimal(++SymIndex, 6)) + ":"; + Fields[1].Str = this->dumper().getStaticSymbolName(Sym); + for (const Field &Entry : Fields) + printField(Entry); + OS << "\n"; + } } template @@ -6417,39 +6471,16 @@ template void LLVMStyle::printCGProfile() { } } -static Expected> toULEB128Array(ArrayRef Data) { - std::vector Ret; - const uint8_t *Cur = Data.begin(); - const uint8_t *End = Data.end(); - while (Cur != End) { - unsigned Size; - const char *Err; - Ret.push_back(decodeULEB128(Cur, &Size, End, &Err)); - if (Err) - return createError(Err); - Cur += Size; - } - return Ret; -} - template void LLVMStyle::printAddrsig() { ListScope L(W, "Addrsig"); const Elf_Shdr *Sec = this->dumper().getDotAddrsigSec(); if (!Sec) return; - Expected> ContentsOrErr = - this->Obj.getSectionContents(*Sec); - if (!ContentsOrErr) { - this->reportUniqueWarning(ContentsOrErr.takeError()); - return; - } - - Expected> SymsOrErr = toULEB128Array(*ContentsOrErr); + Expected> SymsOrErr = + decodeAddrsigSection(this->Obj, *Sec); if (!SymsOrErr) { - this->reportUniqueWarning(createError("unable to decode " + - describe(this->Obj, *Sec) + ": " + - toString(SymsOrErr.takeError()))); + this->reportUniqueWarning(SymsOrErr.takeError()); return; } From 82311766d993f730506cb82471d7349a380a5df7 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 6 Oct 2020 16:04:15 +0300 Subject: [PATCH 225/321] [obj2yaml] - Rename `Group` to `GroupSection`. NFC. The `Group` class represents a group section and it is named inconsistently with other sections which all has the "Section" suffix. It is sometimes confusing, this patch addresses the issue. Differential revision: https://reviews.llvm.org/D88892 --- llvm/include/llvm/ObjectYAML/ELFYAML.h | 4 ++-- llvm/lib/ObjectYAML/ELFEmitter.cpp | 7 ++++--- llvm/lib/ObjectYAML/ELFYAML.cpp | 6 +++--- llvm/tools/obj2yaml/elf2yaml.cpp | 9 +++++---- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index 17ba8f9fda219..0581b0b28b689 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -444,13 +444,13 @@ struct VerdefSection : Section { static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Verdef; } }; -struct Group : Section { +struct GroupSection : Section { // Members of a group contain a flag and a list of section indices // that are part of the group. std::vector Members; Optional Signature; /* Info */ - Group() : Section(ChunkKind::Group) {} + GroupSection() : Section(ChunkKind::Group) {} static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Group; } }; diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 10f31555005fb..559c45a56d71f 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -245,7 +245,8 @@ template class ELFState { void writeSectionContent(Elf_Shdr &SHeader, const ELFYAML::RelrSection &Section, ContiguousBlobAccumulator &CBA); - void writeSectionContent(Elf_Shdr &SHeader, const ELFYAML::Group &Group, + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::GroupSection &Group, ContiguousBlobAccumulator &CBA); void writeSectionContent(Elf_Shdr &SHeader, const ELFYAML::SymtabShndxSection &Shndx, @@ -697,7 +698,7 @@ void ELFState::initSectionHeaders(std::vector &SHeaders, writeSectionContent(SHeader, *S, CBA); } else if (auto S = dyn_cast(Sec)) { writeSectionContent(SHeader, *S, CBA); - } else if (auto S = dyn_cast(Sec)) { + } else if (auto S = dyn_cast(Sec)) { writeSectionContent(SHeader, *S, CBA); } else if (auto S = dyn_cast(Sec)) { writeSectionContent(SHeader, *S, CBA); @@ -1237,7 +1238,7 @@ void ELFState::writeSectionContent( template void ELFState::writeSectionContent(Elf_Shdr &SHeader, - const ELFYAML::Group &Section, + const ELFYAML::GroupSection &Section, ContiguousBlobAccumulator &CBA) { assert(Section.Type == llvm::ELF::SHT_GROUP && "Section type is not SHT_GROUP"); diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index a86172418c1e9..5e642d90ffc77 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -1214,7 +1214,7 @@ static void sectionMapping(IO &IO, ELFYAML::RelrSection &Section) { IO.mapOptional("Content", Section.Content); } -static void groupSectionMapping(IO &IO, ELFYAML::Group &Group) { +static void groupSectionMapping(IO &IO, ELFYAML::GroupSection &Group) { commonSectionMapping(IO, Group); IO.mapOptional("Info", Group.Signature); IO.mapRequired("Members", Group.Members); @@ -1353,8 +1353,8 @@ void MappingTraits>::mapping( break; case ELF::SHT_GROUP: if (!IO.outputting()) - Section.reset(new ELFYAML::Group()); - groupSectionMapping(IO, *cast(Section.get())); + Section.reset(new ELFYAML::GroupSection()); + groupSectionMapping(IO, *cast(Section.get())); break; case ELF::SHT_NOBITS: if (!IO.outputting()) diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index f5ad6a0a44997..2d09c34c093f5 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -94,7 +94,7 @@ class ELFDumper { Expected dumpVerdefSection(const Elf_Shdr *Shdr); Expected dumpSymverSection(const Elf_Shdr *Shdr); Expected dumpVerneedSection(const Elf_Shdr *Shdr); - Expected dumpGroup(const Elf_Shdr *Shdr); + Expected dumpGroupSection(const Elf_Shdr *Shdr); Expected dumpARMIndexTableSection(const Elf_Shdr *Shdr); Expected dumpMipsABIFlags(const Elf_Shdr *Shdr); @@ -480,7 +480,7 @@ ELFDumper::dumpSections() { case ELF::SHT_RELR: return [this](const Elf_Shdr *S) { return dumpRelrSection(S); }; case ELF::SHT_GROUP: - return [this](const Elf_Shdr *S) { return dumpGroup(S); }; + return [this](const Elf_Shdr *S) { return dumpGroupSection(S); }; case ELF::SHT_NOBITS: return [this](const Elf_Shdr *S) { return dumpNoBitsSection(S); }; case ELF::SHT_NOTE: @@ -1323,8 +1323,9 @@ Expected ELFDumper::getSymbolName(uint32_t SymtabNdx, } template -Expected ELFDumper::dumpGroup(const Elf_Shdr *Shdr) { - auto S = std::make_unique(); +Expected +ELFDumper::dumpGroupSection(const Elf_Shdr *Shdr) { + auto S = std::make_unique(); if (Error E = dumpCommonSection(Shdr, *S)) return std::move(E); From 149dc94c1d52c5f78e2aadb57a72dd437fe55aa1 Mon Sep 17 00:00:00 2001 From: Tobias Gysi Date: Wed, 7 Oct 2020 16:18:46 +0200 Subject: [PATCH 226/321] [mlir] fix the types used during the generation of the kernel param array The patch fixes the types used to access the elements of the kernel parameter structure from a pointer to the structure to a pointer to the actual parameter type. Reviewed By: csigg Differential Revision: https://reviews.llvm.org/D88959 --- .../GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp index 56dc7d3f7c622..b25c2643f52ed 100644 --- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp +++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp @@ -279,9 +279,9 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray( for (auto en : llvm::enumerate(arguments)) { auto index = builder.create( loc, llvmInt32Type, builder.getI32IntegerAttr(en.index())); - auto fieldPtr = - builder.create(loc, structType.getPointerTo(), structPtr, - ArrayRef{zero, index.getResult()}); + auto fieldPtr = builder.create( + loc, argumentTypes[en.index()].getPointerTo(), structPtr, + ArrayRef{zero, index.getResult()}); builder.create(loc, en.value(), fieldPtr); auto elementPtr = builder.create(loc, llvmPointerPointerType, arrayPtr, index.getResult()); From 4cae6228d129d4c4dfb156c043977bb6b5690031 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 6 Oct 2020 16:35:35 +0200 Subject: [PATCH 227/321] [ADT] function_ref's constructor is unavailable if the argument is not callable. This allows overload sets containing function_ref arguments to work correctly Otherwise they're ambiguous as anything "could be" converted to a function_ref. This matches proposed std::function_ref, absl::function_ref, etc. Differential Revision: https://reviews.llvm.org/D88901 --- llvm/include/llvm/ADT/STLExtras.h | 13 ++++++++++++- llvm/lib/AsmParser/LLParser.h | 4 ++-- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 4 +--- llvm/unittests/ADT/FunctionRefTest.cpp | 11 +++++++++++ 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 4be016b795a0f..b70a6f9fc381f 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -186,16 +186,27 @@ class function_ref { std::forward(params)...); } + template ::type> + static constexpr bool IsCompatible = + std::is_void::value || std::is_convertible::value; + public: function_ref() = default; function_ref(std::nullptr_t) {} template + // Only allow this constructor if the object is actually callable + // and returns the correct type. function_ref( Callable &&callable, std::enable_if_t< + // This is not the copy-constructor. !std::is_same>, - function_ref>::value> * = nullptr) + function_ref>::value && + // Must be callable and return a suitable type. + IsCompatible> * = nullptr) : callback(callback_fn::type>), callable(reinterpret_cast(&callable)) {} diff --git a/llvm/lib/AsmParser/LLParser.h b/llvm/lib/AsmParser/LLParser.h index a7fbcdd5abc54..5f581f0d4efb5 100644 --- a/llvm/lib/AsmParser/LLParser.h +++ b/llvm/lib/AsmParser/LLParser.h @@ -166,8 +166,8 @@ namespace llvm { : Context(Context), Lex(F, SM, Err, Context), M(M), Index(Index), Slots(Slots), BlockAddressPFS(nullptr) {} bool Run( - bool UpgradeDebugInfo, - DataLayoutCallbackTy DataLayoutCallback = [](Module *) {}); + bool UpgradeDebugInfo, DataLayoutCallbackTy DataLayoutCallback = + [](StringRef) { return None; }); bool parseStandaloneConstantValue(Constant *&C, const SlotMapping *Slots); diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 4d69dd7dcc5d6..15ca3a54da2d3 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -579,9 +579,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { /// \returns true if an error occurred. Error parseBitcodeInto( Module *M, bool ShouldLazyLoadMetadata = false, bool IsImporting = false, - DataLayoutCallbackTy DataLayoutCallback = [](std::string) { - return None; - }); + DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }); static uint64_t decodeSignRotatedValue(uint64_t V); diff --git a/llvm/unittests/ADT/FunctionRefTest.cpp b/llvm/unittests/ADT/FunctionRefTest.cpp index 669b87dbf8e41..f084aa7a660b4 100644 --- a/llvm/unittests/ADT/FunctionRefTest.cpp +++ b/llvm/unittests/ADT/FunctionRefTest.cpp @@ -48,4 +48,15 @@ TEST(FunctionRefTest, BadCopy) { ASSERT_EQ(1, X()); } +// Test that overloads on function_refs are resolved as expected. +const char *returns(StringRef) { return "not a function"; } +const char *returns(function_ref F) { return "number"; } +const char *returns(function_ref F) { return "string"; } + +TEST(FunctionRefTest, SFINAE) { + EXPECT_EQ("not a function", returns("boo!")); + EXPECT_EQ("number", returns([] { return 42; })); + EXPECT_EQ("string", returns([] { return "hello"; })); +} + } // namespace From fbce456fad35efa857b9dea2cff3a938835de44d Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 7 Oct 2020 10:33:40 -0400 Subject: [PATCH 228/321] [gn build] (manually) port ce1365f8f7e --- libcxx/src/CMakeLists.txt | 4 +++- llvm/utils/gn/secondary/libcxx/src/BUILD.gn | 7 ++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index 97e6e226b1ac3..5de4a513ac170 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -56,7 +56,9 @@ set(LIBCXX_SOURCES ) if (LIBCXX_ENABLE_DEBUG_MODE) - list(APPEND LIBCXX_SOURCES debug.cpp) + list(APPEND LIBCXX_SOURCES + debug.cpp + ) endif() if(WIN32) diff --git a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn index 6dccffa7aa34d..71668b65f72f5 100644 --- a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn @@ -2,6 +2,9 @@ import("//clang/runtimes.gni") import("//llvm/utils/gn/build/symlink_or_copy.gni") declare_args() { + # Whether to support libc++ opt-in debug mode via _LIBCPP_DEBUG. + libcxx_enable_debug_mode = true + # Build libc++ with definitions for operator new/delete. libcxx_enable_new_delete_definitions = true @@ -115,7 +118,6 @@ cxx_sources = [ "chrono.cpp", "condition_variable.cpp", "condition_variable_destructor.cpp", - "debug.cpp", "exception.cpp", "functional.cpp", "future.cpp", @@ -169,6 +171,9 @@ if (target_os == "win") { if (target_os == "solaris") { cxx_sources += [ "support/solaris/xlocale.cpp" ] } +if (libcxx_enable_debug_mode) { + cxx_sources += [ "debug.cpp" ] +} if (libcxx_enable_filesystem) { cxx_sources += [ "filesystem/directory_iterator.cpp", From 281703e67ffaee8e26efef86e0df3e145477f4cb Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 7 Oct 2020 16:36:38 +0200 Subject: [PATCH 229/321] Revert "[ADT] function_ref's constructor is unavailable if the argument is not callable." This reverts commit 4cae6228d129d4c4dfb156c043977bb6b5690031. Breaks GCC build: http://lab.llvm.org:8011/#/builders/8/builds/33/steps/6/logs/stdio --- llvm/include/llvm/ADT/STLExtras.h | 13 +------------ llvm/lib/AsmParser/LLParser.h | 4 ++-- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 4 +++- llvm/unittests/ADT/FunctionRefTest.cpp | 11 ----------- 4 files changed, 6 insertions(+), 26 deletions(-) diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index b70a6f9fc381f..4be016b795a0f 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -186,27 +186,16 @@ class function_ref { std::forward(params)...); } - template ::type> - static constexpr bool IsCompatible = - std::is_void::value || std::is_convertible::value; - public: function_ref() = default; function_ref(std::nullptr_t) {} template - // Only allow this constructor if the object is actually callable - // and returns the correct type. function_ref( Callable &&callable, std::enable_if_t< - // This is not the copy-constructor. !std::is_same>, - function_ref>::value && - // Must be callable and return a suitable type. - IsCompatible> * = nullptr) + function_ref>::value> * = nullptr) : callback(callback_fn::type>), callable(reinterpret_cast(&callable)) {} diff --git a/llvm/lib/AsmParser/LLParser.h b/llvm/lib/AsmParser/LLParser.h index 5f581f0d4efb5..a7fbcdd5abc54 100644 --- a/llvm/lib/AsmParser/LLParser.h +++ b/llvm/lib/AsmParser/LLParser.h @@ -166,8 +166,8 @@ namespace llvm { : Context(Context), Lex(F, SM, Err, Context), M(M), Index(Index), Slots(Slots), BlockAddressPFS(nullptr) {} bool Run( - bool UpgradeDebugInfo, DataLayoutCallbackTy DataLayoutCallback = - [](StringRef) { return None; }); + bool UpgradeDebugInfo, + DataLayoutCallbackTy DataLayoutCallback = [](Module *) {}); bool parseStandaloneConstantValue(Constant *&C, const SlotMapping *Slots); diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 15ca3a54da2d3..4d69dd7dcc5d6 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -579,7 +579,9 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { /// \returns true if an error occurred. Error parseBitcodeInto( Module *M, bool ShouldLazyLoadMetadata = false, bool IsImporting = false, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }); + DataLayoutCallbackTy DataLayoutCallback = [](std::string) { + return None; + }); static uint64_t decodeSignRotatedValue(uint64_t V); diff --git a/llvm/unittests/ADT/FunctionRefTest.cpp b/llvm/unittests/ADT/FunctionRefTest.cpp index f084aa7a660b4..669b87dbf8e41 100644 --- a/llvm/unittests/ADT/FunctionRefTest.cpp +++ b/llvm/unittests/ADT/FunctionRefTest.cpp @@ -48,15 +48,4 @@ TEST(FunctionRefTest, BadCopy) { ASSERT_EQ(1, X()); } -// Test that overloads on function_refs are resolved as expected. -const char *returns(StringRef) { return "not a function"; } -const char *returns(function_ref F) { return "number"; } -const char *returns(function_ref F) { return "string"; } - -TEST(FunctionRefTest, SFINAE) { - EXPECT_EQ("not a function", returns("boo!")); - EXPECT_EQ("number", returns([] { return 42; })); - EXPECT_EQ("string", returns([] { return "hello"; })); -} - } // namespace From fc819b6925611b6c564daf6752af2e88ce12f5c0 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 7 Oct 2020 15:48:01 +0100 Subject: [PATCH 230/321] [AMDGPU] Use @LINE for error checking in gfx10.3 assembler tests --- llvm/test/MC/AMDGPU/gfx1030_err.s | 92 +++++++++++++++---------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/llvm/test/MC/AMDGPU/gfx1030_err.s b/llvm/test/MC/AMDGPU/gfx1030_err.s index f7134a30c6f8e..246548f166839 100644 --- a/llvm/test/MC/AMDGPU/gfx1030_err.s +++ b/llvm/test/MC/AMDGPU/gfx1030_err.s @@ -2,139 +2,139 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1031 %s 2>&1 | FileCheck --check-prefix=GFX10 --implicit-check-not=error: %s v_dot8c_i32_i4 v5, v1, v2 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU s_get_waveid_in_workgroup s0 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU s_memtime s[0:1] -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_XNACK_MASK) -// GFX10: error: specified hardware register is not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: specified hardware register is not supported on this GPU v_mac_f32 v0, v1, v2 -// GFX10: error: operands are not valid for this GPU or mode +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_mad_f32 v0, v1, v2, v3 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_madak_f32 v0, v1, v2, 1 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_madmk_f32 v0, v1, 1, v2 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mad_legacy_f32 v0, v1, v2, v3 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mac_legacy_f32 v0, v1, v2 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_add_src2_u32 v1 offset:65535 gds -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_add_src2_u32 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_add_src2_f32 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_sub_src2_u32 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_rsub_src2_u32 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_inc_src2_u32 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_dec_src2_u32 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_min_src2_i32 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_max_src2_i32 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_min_src2_u32 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_max_src2_u32 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_and_src2_b32 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_or_src2_b32 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_xor_src2_b32 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_min_src2_f32 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_max_src2_f32 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_add_src2_u64 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_sub_src2_u64 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_rsub_src2_u64 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_inc_src2_u64 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_dec_src2_u64 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_min_src2_i64 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_max_src2_i64 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_min_src2_u64 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_max_src2_u64 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_and_src2_b64 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_or_src2_b64 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_xor_src2_b64 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_min_src2_f64 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_max_src2_f64 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_write_src2_b32 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_write_src2_b64 v1 offset:65535 -// GFX10: error: instruction not supported on this GPU +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU From 333b2ab60b61afb342a8d271477849fb445a26af Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Wed, 7 Oct 2020 09:26:53 -0500 Subject: [PATCH 231/321] [SVE] Lower fixed length VECREDUCE_OR operation Differential Revision: https://reviews.llvm.org/D88847 --- .../Target/AArch64/AArch64ISelLowering.cpp | 12 + .../AArch64/sve-fixed-length-log-reduce.ll | 351 ++++++++++++++++++ 2 files changed, 363 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 308628a7f8348..c2972ab4f2f7c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1121,6 +1121,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECREDUCE_AND, MVT::v2i32, Custom); setOperationAction(ISD::VECREDUCE_AND, MVT::v4i32, Custom); setOperationAction(ISD::VECREDUCE_AND, MVT::v2i64, Custom); + setOperationAction(ISD::VECREDUCE_OR, MVT::v8i8, Custom); + setOperationAction(ISD::VECREDUCE_OR, MVT::v16i8, Custom); + setOperationAction(ISD::VECREDUCE_OR, MVT::v4i16, Custom); + setOperationAction(ISD::VECREDUCE_OR, MVT::v8i16, Custom); + setOperationAction(ISD::VECREDUCE_OR, MVT::v2i32, Custom); + setOperationAction(ISD::VECREDUCE_OR, MVT::v4i32, Custom); + setOperationAction(ISD::VECREDUCE_OR, MVT::v2i64, Custom); setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom); setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom); setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom); @@ -1263,6 +1270,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { setOperationAction(ISD::VECREDUCE_AND, VT, Custom); setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); + setOperationAction(ISD::VECREDUCE_OR, VT, Custom); setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); @@ -3944,6 +3952,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerSTORE(Op, DAG); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: case ISD::VECREDUCE_SMAX: case ISD::VECREDUCE_SMIN: case ISD::VECREDUCE_UMAX: @@ -9732,6 +9741,7 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, // Try to lower fixed length reductions to SVE. EVT SrcVT = Src.getValueType(); bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND || + Op.getOpcode() == ISD::VECREDUCE_OR || (Op.getOpcode() != ISD::VECREDUCE_ADD && SrcVT.getVectorElementType() == MVT::i64); if (useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) { @@ -9740,6 +9750,8 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, return LowerFixedLengthReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG); case ISD::VECREDUCE_AND: return LowerFixedLengthReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG); + case ISD::VECREDUCE_OR: + return LowerFixedLengthReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG); case ISD::VECREDUCE_SMAX: return LowerFixedLengthReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG); case ISD::VECREDUCE_SMIN: diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll index b95564cbc1e5e..efab691b4a9f4 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll @@ -343,6 +343,329 @@ define i64 @andv_v32i64(<32 x i64>* %a) #0 { ret i64 %res } +; +; ORV +; + +; No single instruction NEON ORV support. Use SVE. +define i8 @orv_v8i8(<8 x i8> %a) #0 { +; CHECK-LABEL: orv_v8i8: +; CHECK: ptrue [[PG:p[0-9]+]].b, vl8 +; CHECK: orv b[[REDUCE:[0-9]+]], [[PG]], z0.b +; CHECK: fmov w0, s[[REDUCE]] +; CHECK: ret + %res = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> %a) + ret i8 %res +} + +; No single instruction NEON ORV support. Use SVE. +define i8 @orv_v16i8(<16 x i8> %a) #0 { +; CHECK-LABEL: orv_v16i8: +; CHECK: ptrue [[PG:p[0-9]+]].b, vl16 +; CHECK: orv b[[REDUCE:[0-9]+]], [[PG]], z0.b +; CHECK: fmov w0, s[[REDUCE]] +; CHECK: ret + %res = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> %a) + ret i8 %res +} + +define i8 @orv_v32i8(<32 x i8>* %a) #0 { +; CHECK-LABEL: orv_v32i8: +; VBITS_GE_256: ptrue [[PG:p[0-9]+]].b, vl32 +; VBITS_GE_256-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_256-NEXT: orv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b +; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_256-NEXT: ret + %op = load <32 x i8>, <32 x i8>* %a + %res = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> %op) + ret i8 %res +} + +define i8 @orv_v64i8(<64 x i8>* %a) #0 { +; CHECK-LABEL: orv_v64i8: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64 +; VBITS_GE_512-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: orv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b +; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 +; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]] +; VBITS_EQ_256-DAG: orr [[OR:z[0-9]+]].d, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: orv b[[REDUCE:[0-9]+]], [[PG]], [[OR]].b +; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] +; VBITS_EQ_256-NEXT: ret + + %op = load <64 x i8>, <64 x i8>* %a + %res = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> %op) + ret i8 %res +} + +define i8 @orv_v128i8(<128 x i8>* %a) #0 { +; CHECK-LABEL: orv_v128i8: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128 +; VBITS_GE_1024-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: orv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b +; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_1024-NEXT: ret + %op = load <128 x i8>, <128 x i8>* %a + %res = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> %op) + ret i8 %res +} + +define i8 @orv_v256i8(<256 x i8>* %a) #0 { +; CHECK-LABEL: orv_v256i8: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256 +; VBITS_GE_2048-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: orv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b +; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_2048-NEXT: ret + %op = load <256 x i8>, <256 x i8>* %a + %res = call i8 @llvm.experimental.vector.reduce.or.v256i8(<256 x i8> %op) + ret i8 %res +} + +; No single instruction NEON ORV support. Use SVE. +define i16 @orv_v4i16(<4 x i16> %a) #0 { +; CHECK-LABEL: orv_v4i16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl4 +; CHECK: orv h[[REDUCE:[0-9]+]], [[PG]], z0.h +; CHECK: fmov w0, s[[REDUCE]] +; CHECK: ret + %res = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> %a) + ret i16 %res +} + +; No single instruction NEON ORV support. Use SVE. +define i16 @orv_v8i16(<8 x i16> %a) #0 { +; CHECK-LABEL: orv_v8i16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl8 +; CHECK: orv h[[REDUCE:[0-9]+]], [[PG]], z0.h +; CHECK: fmov w0, s[[REDUCE]] +; CHECK: ret + %res = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> %a) + ret i16 %res +} + +define i16 @orv_v16i16(<16 x i16>* %a) #0 { +; CHECK-LABEL: orv_v16i16: +; VBITS_GE_256: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_GE_256-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_256-NEXT: orv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h +; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_256-NEXT: ret + %op = load <16 x i16>, <16 x i16>* %a + %res = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> %op) + ret i16 %res +} + +define i16 @orv_v32i16(<32 x i16>* %a) #0 { +; CHECK-LABEL: orv_v32i16: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: orv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h +; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: orr [[OR:z[0-9]+]].d, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: orv h[[REDUCE:[0-9]+]], [[PG]], [[OR]].h +; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] +; VBITS_EQ_256-NEXT: ret + %op = load <32 x i16>, <32 x i16>* %a + %res = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> %op) + ret i16 %res +} + +define i16 @orv_v64i16(<64 x i16>* %a) #0 { +; CHECK-LABEL: orv_v64i16: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: orv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h +; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_1024-NEXT: ret + %op = load <64 x i16>, <64 x i16>* %a + %res = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> %op) + ret i16 %res +} + +define i16 @orv_v128i16(<128 x i16>* %a) #0 { +; CHECK-LABEL: orv_v128i16: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: orv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h +; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_2048-NEXT: ret + %op = load <128 x i16>, <128 x i16>* %a + %res = call i16 @llvm.experimental.vector.reduce.or.v128i16(<128 x i16> %op) + ret i16 %res +} + +; No single instruction NEON ORV support. Use SVE. +define i32 @orv_v2i32(<2 x i32> %a) #0 { +; CHECK-LABEL: orv_v2i32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl2 +; CHECK: orv [[REDUCE:s[0-9]+]], [[PG]], z0.s +; CHECK: fmov w0, [[REDUCE]] +; CHECK: ret + %res = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> %a) + ret i32 %res +} + +; No single instruction NEON ORV support. Use SVE. +define i32 @orv_v4i32(<4 x i32> %a) #0 { +; CHECK-LABEL: orv_v4i32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl4 +; CHECK: orv [[REDUCE:s[0-9]+]], [[PG]], z0.s +; CHECK: fmov w0, [[REDUCE]] +; CHECK: ret + %res = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %a) + ret i32 %res +} + +define i32 @orv_v8i32(<8 x i32>* %a) #0 { +; CHECK-LABEL: orv_v8i32: +; VBITS_GE_256: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_GE_256-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_256-NEXT: orv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]] +; VBITS_GE_256-NEXT: ret + %op = load <8 x i32>, <8 x i32>* %a + %res = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %op) + ret i32 %res +} + +define i32 @orv_v16i32(<16 x i32>* %a) #0 { +; CHECK-LABEL: orv_v16i32: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: orv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_512-NEXT: fmov w0, [[REDUCE]] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: orr [[OR:z[0-9]+]].d, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: orv [[REDUCE:s[0-9]+]], [[PG]], [[OR]].s +; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] +; VBITS_EQ_256-NEXT: ret + %op = load <16 x i32>, <16 x i32>* %a + %res = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> %op) + ret i32 %res +} + +define i32 @orv_v32i32(<32 x i32>* %a) #0 { +; CHECK-LABEL: orv_v32i32: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: orv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]] +; VBITS_GE_1024-NEXT: ret + %op = load <32 x i32>, <32 x i32>* %a + %res = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> %op) + ret i32 %res +} + +define i32 @orv_v64i32(<64 x i32>* %a) #0 { +; CHECK-LABEL: orv_v64i32: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: orv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]] +; VBITS_GE_2048-NEXT: ret + %op = load <64 x i32>, <64 x i32>* %a + %res = call i32 @llvm.experimental.vector.reduce.or.v64i32(<64 x i32> %op) + ret i32 %res +} + +; Nothing to do for single element vectors. +define i64 @orv_v1i64(<1 x i64> %a) #0 { +; CHECK-LABEL: orv_v1i64: +; CHECK: fmov x0, d0 +; CHECK: ret + %res = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> %a) + ret i64 %res +} + +; Use SVE for 128-bit vectors +define i64 @orv_v2i64(<2 x i64> %a) #0 { +; CHECK-LABEL: orv_v2i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl2 +; CHECK: orv [[REDUCE:d[0-9]+]], [[PG]], z0.d +; CHECK: fmov x0, [[REDUCE]] +; CHECK: ret + %res = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %a) + ret i64 %res +} + +define i64 @orv_v4i64(<4 x i64>* %a) #0 { +; CHECK-LABEL: orv_v4i64: +; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_GE_256-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_256-NEXT: orv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_256-NEXT: ret + %op = load <4 x i64>, <4 x i64>* %a + %res = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> %op) + ret i64 %res +} + +define i64 @orv_v8i64(<8 x i64>* %a) #0 { +; CHECK-LABEL: orv_v8i64: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: orv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: orr [[OR:z[0-9]+]].d, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: orv [[REDUCE:d[0-9]+]], [[PG]], [[OR]].d +; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] +; VBITS_EQ_256-NEXT: ret + %op = load <8 x i64>, <8 x i64>* %a + %res = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> %op) + ret i64 %res +} + +define i64 @orv_v16i64(<16 x i64>* %a) #0 { +; CHECK-LABEL: orv_v16i64: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: orv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_1024-NEXT: ret + %op = load <16 x i64>, <16 x i64>* %a + %res = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> %op) + ret i64 %res +} + +define i64 @orv_v32i64(<32 x i64>* %a) #0 { +; CHECK-LABEL: orv_v32i64: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: orv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_2048-NEXT: ret + %op = load <32 x i64>, <32 x i64>* %a + %res = call i64 @llvm.experimental.vector.reduce.or.v32i64(<32 x i64> %op) + ret i64 %res +} + attributes #0 = { "target-features"="+sve" } declare i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8>) @@ -372,3 +695,31 @@ declare i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64>) declare i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64>) declare i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64>) declare i64 @llvm.experimental.vector.reduce.and.v32i64(<32 x i64>) + +declare i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v256i8(<256 x i8>) + +declare i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v128i16(<128 x i16>) + +declare i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.or.v64i32(<64 x i32>) + +declare i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.or.v32i64(<32 x i64>) From 528057c19755ad842052fba3a42dcbf7deafc6de Mon Sep 17 00:00:00 2001 From: Ronak Chauhan Date: Wed, 7 Oct 2020 08:14:46 +0530 Subject: [PATCH 232/321] [AMDGPU] Support disassembly for AMDGPU kernel descriptors Decode AMDGPU Kernel descriptors as assembler directives. Reviewed By: scott.linder, jhenderson, kzhuravl Differential Revision: https://reviews.llvm.org/D80713 --- D80713.diff | 848 ++++++++++++++++++ .../llvm/Support/AMDHSAKernelDescriptor.h | 70 +- .../Disassembler/AMDGPUDisassembler.cpp | 345 +++++++ .../AMDGPU/Disassembler/AMDGPUDisassembler.h | 30 +- llvm/test/CodeGen/AMDGPU/nop-data.ll | 4 +- .../llvm-objdump/ELF/AMDGPU/kd-failure.s | 37 + .../tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s | 49 + .../tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s | 36 + .../llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s | 58 ++ .../llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s | 53 ++ .../llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s | 41 + llvm/tools/llvm-objdump/llvm-objdump.cpp | 17 - 12 files changed, 1538 insertions(+), 50 deletions(-) create mode 100644 D80713.diff create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s diff --git a/D80713.diff b/D80713.diff new file mode 100644 index 0000000000000..e51f4e02ab783 --- /dev/null +++ b/D80713.diff @@ -0,0 +1,848 @@ +diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h ++++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +@@ -162,39 +162,49 @@ + uint8_t reserved2[6]; + }; + ++enum : uint32_t { ++ GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0, ++ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4, ++ RESERVED0_OFFSET = 8, ++ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16, ++ RESERVED1_OFFSET = 24, ++ COMPUTE_PGM_RSRC3_OFFSET = 44, ++ COMPUTE_PGM_RSRC1_OFFSET = 48, ++ COMPUTE_PGM_RSRC2_OFFSET = 52, ++ KERNEL_CODE_PROPERTIES_OFFSET = 56, ++ RESERVED2_OFFSET = 58, ++}; ++ + static_assert( + sizeof(kernel_descriptor_t) == 64, + "invalid size for kernel_descriptor_t"); +-static_assert( +- offsetof(kernel_descriptor_t, group_segment_fixed_size) == 0, +- "invalid offset for group_segment_fixed_size"); +-static_assert( +- offsetof(kernel_descriptor_t, private_segment_fixed_size) == 4, +- "invalid offset for private_segment_fixed_size"); +-static_assert( +- offsetof(kernel_descriptor_t, reserved0) == 8, +- "invalid offset for reserved0"); +-static_assert( +- offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == 16, +- "invalid offset for kernel_code_entry_byte_offset"); +-static_assert( +- offsetof(kernel_descriptor_t, reserved1) == 24, +- "invalid offset for reserved1"); +-static_assert( +- offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == 44, +- "invalid offset for compute_pgm_rsrc3"); +-static_assert( +- offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48, +- "invalid offset for compute_pgm_rsrc1"); +-static_assert( +- offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == 52, +- "invalid offset for compute_pgm_rsrc2"); +-static_assert( +- offsetof(kernel_descriptor_t, kernel_code_properties) == 56, +- "invalid offset for kernel_code_properties"); +-static_assert( +- offsetof(kernel_descriptor_t, reserved2) == 58, +- "invalid offset for reserved2"); ++static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) == ++ GROUP_SEGMENT_FIXED_SIZE_OFFSET, ++ "invalid offset for group_segment_fixed_size"); ++static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) == ++ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET, ++ "invalid offset for private_segment_fixed_size"); ++static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET, ++ "invalid offset for reserved0"); ++static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == ++ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET, ++ "invalid offset for kernel_code_entry_byte_offset"); ++static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET, ++ "invalid offset for reserved1"); ++static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == ++ COMPUTE_PGM_RSRC3_OFFSET, ++ "invalid offset for compute_pgm_rsrc3"); ++static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == ++ COMPUTE_PGM_RSRC1_OFFSET, ++ "invalid offset for compute_pgm_rsrc1"); ++static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == ++ COMPUTE_PGM_RSRC2_OFFSET, ++ "invalid offset for compute_pgm_rsrc2"); ++static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) == ++ KERNEL_CODE_PROPERTIES_OFFSET, ++ "invalid offset for kernel_code_properties"); ++static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET, ++ "invalid offset for reserved2"); + + } // end namespace amdhsa + } // end namespace llvm +diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h ++++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +@@ -17,10 +17,11 @@ + + #include "llvm/ADT/ArrayRef.h" + #include "llvm/MC/MCContext.h" +-#include "llvm/MC/MCInstrInfo.h" + #include "llvm/MC/MCDisassembler/MCDisassembler.h" + #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" + #include "llvm/MC/MCDisassembler/MCSymbolizer.h" ++#include "llvm/MC/MCInstrInfo.h" ++#include "llvm/Support/DataExtractor.h" + + #include + #include +@@ -66,6 +67,33 @@ + DecodeStatus tryDecodeInst(const uint8_t* Table, MCInst &MI, uint64_t Inst, + uint64_t Address) const; + ++ Optional onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ++ ArrayRef Bytes, ++ uint64_t Address, ++ raw_ostream &CStream) const override; ++ ++ DecodeStatus decodeKernelDescriptor(StringRef KdName, ArrayRef Bytes, ++ uint64_t KdAddress) const; ++ ++ DecodeStatus ++ decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ++ ArrayRef Bytes, ++ raw_string_ostream &KdStream) const; ++ ++ /// Decode as directives that handle COMPUTE_PGM_RSRC1. ++ /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC1. ++ /// \param KdStream - Stream to write the disassembled directives to. ++ // NOLINTNEXTLINE(readability-identifier-naming) ++ DecodeStatus decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, ++ raw_string_ostream &KdStream) const; ++ ++ /// Decode as directives that handle COMPUTE_PGM_RSRC2. ++ /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC2. ++ /// \param KdStream - Stream to write the disassembled directives to. ++ // NOLINTNEXTLINE(readability-identifier-naming) ++ DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, ++ raw_string_ostream &KdStream) const; ++ + DecodeStatus convertSDWAInst(MCInst &MI) const; + DecodeStatus convertDPP8Inst(MCInst &MI) const; + DecodeStatus convertMIMGInst(MCInst &MI) const; +diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp ++++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +@@ -34,6 +34,7 @@ + #include "llvm/MC/MCFixedLenDisassembler.h" + #include "llvm/MC/MCInst.h" + #include "llvm/MC/MCSubtargetInfo.h" ++#include "llvm/Support/AMDHSAKernelDescriptor.h" + #include "llvm/Support/Endian.h" + #include "llvm/Support/ErrorHandling.h" + #include "llvm/Support/MathExtras.h" +@@ -1215,6 +1216,350 @@ + return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; + } + ++//===----------------------------------------------------------------------===// ++// AMDGPU specific symbol handling ++//===----------------------------------------------------------------------===// ++#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ ++ do { \ ++ KdStream << Indent << DIRECTIVE " " \ ++ << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ ++ } while (0) ++ ++// NOLINTNEXTLINE(readability-identifier-naming) ++MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( ++ uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { ++ using namespace amdhsa; ++ StringRef Indent = "\t"; ++ ++ // We cannot accurately backward compute #VGPRs used from ++ // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same ++ // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we ++ // simply calculate the inverse of what the assembler does. ++ ++ uint32_t GranulatedWorkitemVGPRCount = ++ (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >> ++ COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT; ++ ++ uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) * ++ AMDGPU::IsaInfo::getVGPREncodingGranule(&STI); ++ ++ KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n'; ++ ++ // We cannot backward compute values used to calculate ++ // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following ++ // directives can't be computed: ++ // .amdhsa_reserve_vcc ++ // .amdhsa_reserve_flat_scratch ++ // .amdhsa_reserve_xnack_mask ++ // They take their respective default values if not specified in the assembly. ++ // ++ // GRANULATED_WAVEFRONT_SGPR_COUNT ++ // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK) ++ // ++ // We compute the inverse as though all directives apart from NEXT_FREE_SGPR ++ // are set to 0. So while disassembling we consider that: ++ // ++ // GRANULATED_WAVEFRONT_SGPR_COUNT ++ // = f(NEXT_FREE_SGPR + 0 + 0 + 0) ++ // ++ // The disassembler cannot recover the original values of those 3 directives. ++ ++ uint32_t GranulatedWavefrontSGPRCount = ++ (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >> ++ COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT; ++ ++ if (isGFX10() && GranulatedWavefrontSGPRCount) ++ return MCDisassembler::Fail; ++ ++ uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) * ++ AMDGPU::IsaInfo::getSGPREncodingGranule(&STI); ++ ++ KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n'; ++ KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n'; ++ KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n'; ++ KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n"; ++ ++ if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY) ++ return MCDisassembler::Fail; ++ ++ PRINT_DIRECTIVE(".amdhsa_float_round_mode_32", ++ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); ++ PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64", ++ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); ++ PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32", ++ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); ++ PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64", ++ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); ++ ++ if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV) ++ return MCDisassembler::Fail; ++ ++ PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); ++ ++ if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE) ++ return MCDisassembler::Fail; ++ ++ PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); ++ ++ if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY) ++ return MCDisassembler::Fail; ++ ++ if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER) ++ return MCDisassembler::Fail; ++ ++ PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL); ++ ++ if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0) ++ return MCDisassembler::Fail; ++ ++ if (isGFX10()) { ++ PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode", ++ COMPUTE_PGM_RSRC1_WGP_MODE); ++ PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED); ++ PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS); ++ } ++ return MCDisassembler::Success; ++} ++ ++// NOLINTNEXTLINE(readability-identifier-naming) ++MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( ++ uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { ++ using namespace amdhsa; ++ StringRef Indent = "\t"; ++ PRINT_DIRECTIVE( ++ ".amdhsa_system_sgpr_private_segment_wavefront_offset", ++ COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET); ++ PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x", ++ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); ++ PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y", ++ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); ++ PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z", ++ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); ++ PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info", ++ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); ++ PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id", ++ COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); ++ ++ if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH) ++ return MCDisassembler::Fail; ++ ++ if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY) ++ return MCDisassembler::Fail; ++ ++ if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE) ++ return MCDisassembler::Fail; ++ ++ PRINT_DIRECTIVE( ++ ".amdhsa_exception_fp_ieee_invalid_op", ++ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); ++ PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src", ++ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); ++ PRINT_DIRECTIVE( ++ ".amdhsa_exception_fp_ieee_div_zero", ++ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); ++ PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow", ++ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); ++ PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow", ++ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); ++ PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact", ++ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); ++ PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero", ++ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); ++ ++ if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0) ++ return MCDisassembler::Fail; ++ ++ return MCDisassembler::Success; ++} ++ ++#undef PRINT_DIRECTIVE ++ ++MCDisassembler::DecodeStatus ++AMDGPUDisassembler::decodeKernelDescriptorDirective( ++ DataExtractor::Cursor &Cursor, ArrayRef Bytes, ++ raw_string_ostream &KdStream) const { ++#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ ++ do { \ ++ KdStream << Indent << DIRECTIVE " " \ ++ << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ ++ } while (0) ++ ++ uint16_t TwoByteBuffer = 0; ++ uint32_t FourByteBuffer = 0; ++ uint64_t EightByteBuffer = 0; ++ ++ StringRef ReservedBytes; ++ StringRef Indent = "\t"; ++ ++ assert(Bytes.size() == 64); ++ DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8); ++ ++ switch (Cursor.tell()) { ++ case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET: ++ FourByteBuffer = DE.getU32(Cursor); ++ KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer ++ << '\n'; ++ return MCDisassembler::Success; ++ ++ case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET: ++ FourByteBuffer = DE.getU32(Cursor); ++ KdStream << Indent << ".amdhsa_private_segment_fixed_size " ++ << FourByteBuffer << '\n'; ++ return MCDisassembler::Success; ++ ++ case amdhsa::RESERVED0_OFFSET: ++ // 8 reserved bytes, must be 0. ++ EightByteBuffer = DE.getU64(Cursor); ++ if (EightByteBuffer) { ++ return MCDisassembler::Fail; ++ } ++ return MCDisassembler::Success; ++ ++ case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET: ++ // KERNEL_CODE_ENTRY_BYTE_OFFSET ++ // So far no directive controls this for Code Object V3, so simply skip for ++ // disassembly. ++ DE.skip(Cursor, 8); ++ return MCDisassembler::Success; ++ ++ case amdhsa::RESERVED1_OFFSET: ++ // 20 reserved bytes, must be 0. ++ ReservedBytes = DE.getBytes(Cursor, 20); ++ for (int I = 0; I < 20; ++I) { ++ if (ReservedBytes[I] != 0) { ++ return MCDisassembler::Fail; ++ } ++ } ++ return MCDisassembler::Success; ++ ++ case amdhsa::COMPUTE_PGM_RSRC3_OFFSET: ++ // COMPUTE_PGM_RSRC3 ++ // - Only set for GFX10, GFX6-9 have this to be 0. ++ // - Currently no directives directly control this. ++ FourByteBuffer = DE.getU32(Cursor); ++ if (!isGFX10() && FourByteBuffer) { ++ return MCDisassembler::Fail; ++ } ++ return MCDisassembler::Success; ++ ++ case amdhsa::COMPUTE_PGM_RSRC1_OFFSET: ++ FourByteBuffer = DE.getU32(Cursor); ++ if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) == ++ MCDisassembler::Fail) { ++ return MCDisassembler::Fail; ++ } ++ return MCDisassembler::Success; ++ ++ case amdhsa::COMPUTE_PGM_RSRC2_OFFSET: ++ FourByteBuffer = DE.getU32(Cursor); ++ if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) == ++ MCDisassembler::Fail) { ++ return MCDisassembler::Fail; ++ } ++ return MCDisassembler::Success; ++ ++ case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET: ++ using namespace amdhsa; ++ TwoByteBuffer = DE.getU16(Cursor); ++ ++ PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer", ++ KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); ++ PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr", ++ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); ++ PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr", ++ KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); ++ PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr", ++ KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); ++ PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id", ++ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); ++ PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init", ++ KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); ++ PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size", ++ KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); ++ ++ if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0) ++ return MCDisassembler::Fail; ++ ++ // Reserved for GFX9 ++ if (isGFX9() && ++ (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) { ++ return MCDisassembler::Fail; ++ } else if (isGFX10()) { ++ PRINT_DIRECTIVE(".amdhsa_wavefront_size32", ++ KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); ++ } ++ ++ if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) ++ return MCDisassembler::Fail; ++ ++ return MCDisassembler::Success; ++ ++ case amdhsa::RESERVED2_OFFSET: ++ // 6 bytes from here are reserved, must be 0. ++ ReservedBytes = DE.getBytes(Cursor, 6); ++ for (int I = 0; I < 6; ++I) { ++ if (ReservedBytes[I] != 0) ++ return MCDisassembler::Fail; ++ } ++ return MCDisassembler::Success; ++ ++ default: ++ llvm_unreachable("Unhandled index. Case statements cover everything."); ++ return MCDisassembler::Fail; ++ } ++#undef PRINT_DIRECTIVE ++} ++ ++MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor( ++ StringRef KdName, ArrayRef Bytes, uint64_t KdAddress) const { ++ // CP microcode requires the kernel descriptor to be 64 aligned. ++ if (Bytes.size() != 64 || KdAddress % 64 != 0) ++ return MCDisassembler::Fail; ++ ++ std::string Kd; ++ raw_string_ostream KdStream(Kd); ++ KdStream << ".amdhsa_kernel " << KdName << '\n'; ++ ++ DataExtractor::Cursor C(0); ++ while (C && C.tell() < Bytes.size()) { ++ MCDisassembler::DecodeStatus Status = ++ decodeKernelDescriptorDirective(C, Bytes, KdStream); ++ ++ cantFail(C.takeError()); ++ ++ if (Status == MCDisassembler::Fail) ++ return MCDisassembler::Fail; ++ } ++ KdStream << ".end_amdhsa_kernel\n"; ++ outs() << KdStream.str(); ++ return MCDisassembler::Success; ++} ++ ++Optional ++AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ++ ArrayRef Bytes, uint64_t Address, ++ raw_ostream &CStream) const { ++ // Right now only kernel descriptor needs to be handled. ++ // We ignore all other symbols for target specific handling. ++ // TODO: ++ // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code ++ // Object V2 and V3 when symbols are marked protected. ++ ++ // amd_kernel_code_t for Code Object V2. ++ if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) { ++ Size = 256; ++ return MCDisassembler::Fail; ++ } ++ ++ // Code Object V3 kernel descriptors. ++ StringRef Name = Symbol.Name; ++ if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) { ++ Size = 64; // Size = 64 regardless of success or failure. ++ return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address); ++ } ++ return None; ++} ++ + //===----------------------------------------------------------------------===// + // AMDGPUSymbolizer + //===----------------------------------------------------------------------===// +diff --git a/llvm/test/CodeGen/AMDGPU/nop-data.ll b/llvm/test/CodeGen/AMDGPU/nop-data.ll +--- a/llvm/test/CodeGen/AMDGPU/nop-data.ll ++++ b/llvm/test/CodeGen/AMDGPU/nop-data.ll +@@ -1,7 +1,7 @@ + ; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-code-object-v3 -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - --mcpu=fiji | FileCheck %s + + ; CHECK: : +-; CHECK-NEXT: s_endpgm ++; CHECK: s_endpgm + define amdgpu_kernel void @kernel0() align 256 { + entry: + ret void +@@ -80,7 +80,7 @@ + + ; CHECK-EMPTY: + ; CHECK-NEXT: : +-; CHECK-NEXT: s_endpgm ++; CHECK: s_endpgm + define amdgpu_kernel void @kernel1(i32 addrspace(1)* addrspace(4)* %ptr.out) align 256 { + entry: + ret void +diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s +new file mode 100644 +--- /dev/null ++++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s +@@ -0,0 +1,37 @@ ++;; Failure test. We create a malformed kernel descriptor (KD) by manually ++;; setting the bytes, because one can't create a malformed KD using the ++;; assembler directives. ++ ++; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t.o ++ ++; RUN: printf ".type my_kernel.kd, @object \nmy_kernel.kd:\n.size my_kernel.kd, 64\n" > %t1.sym_info ++; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t.o \ ++; RUN: | tail -n +9 > %t1.sym_content ++; RUN: cat %t1.sym_info %t1.sym_content > %t1.s ++ ++; RUN: llvm-mc %t1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t-re-assemble.o ++; RUN: diff %t.o %t-re-assemble.o ++ ++;; Test failure by setting one of the reserved bytes to non-zero value. ++ ++.type my_kernel.kd, @object ++.size my_kernel.kd, 64 ++my_kernel.kd: ++ .long 0x00000000 ;; group_segment_fixed_size ++ .long 0x00000000 ;; private_segment_fixed_size ++ .quad 0x00FF000000000000 ;; reserved bytes. ++ .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. ++ ++ ;; 20 reserved bytes. ++ .quad 0x0000000000000000 ++ .quad 0x0000000000000000 ++ .long 0x00000000 ++ ++ .long 0x00000000 ;; compute_PGM_RSRC3 ++ .long 0x00000000 ;; compute_PGM_RSRC1 ++ .long 0x00000000 ;; compute_PGM_RSRC2 ++ .short 0x0000 ;; additional fields. ++ ++ ;; 6 reserved bytes. ++ .long 0x0000000 ++ .short 0x0000 +diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s +new file mode 100644 +--- /dev/null ++++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s +@@ -0,0 +1,49 @@ ++;; Test disassembly for GRANULATED_WAVEFRONT_SGPR_COUNT in the kernel descriptor. ++ ++; RUN: split-file %s %t.dir ++ ++; RUN: llvm-mc %t.dir/1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 ++; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ ++; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble ++; RUN: diff %t1 %t1-re-assemble ++ ++; RUN: llvm-mc %t.dir/2.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 ++; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ ++; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble ++; RUN: diff %t2 %t2-re-assemble ++ ++; RUN: llvm-mc %t.dir/3.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 ++; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ ++; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble ++; RUN: diff %t3 %t3-re-assemble ++ ++ ++;--- 1.s ++;; Only set next_free_sgpr. ++.amdhsa_kernel my_kernel_1 ++ .amdhsa_next_free_vgpr 0 ++ .amdhsa_next_free_sgpr 42 ++ .amdhsa_reserve_flat_scratch 0 ++ .amdhsa_reserve_xnack_mask 0 ++ .amdhsa_reserve_vcc 0 ++.end_amdhsa_kernel ++ ++;--- 2.s ++;; Only set other directives. ++.amdhsa_kernel my_kernel_2 ++ .amdhsa_next_free_vgpr 0 ++ .amdhsa_next_free_sgpr 0 ++ .amdhsa_reserve_flat_scratch 1 ++ .amdhsa_reserve_xnack_mask 1 ++ .amdhsa_reserve_vcc 1 ++.end_amdhsa_kernel ++ ++;--- 3.s ++;; Set all affecting directives. ++.amdhsa_kernel my_kernel_3 ++ .amdhsa_next_free_vgpr 0 ++ .amdhsa_next_free_sgpr 35 ++ .amdhsa_reserve_flat_scratch 1 ++ .amdhsa_reserve_xnack_mask 1 ++ .amdhsa_reserve_vcc 1 ++.end_amdhsa_kernel +diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s +new file mode 100644 +--- /dev/null ++++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s +@@ -0,0 +1,36 @@ ++;; Test disassembly for GRANULATED_WORKITEM_VGPR_COUNT in the kernel descriptor. ++ ++; RUN: split-file %s %t.dir ++ ++; RUN: llvm-mc %t.dir/1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 ++; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ ++; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble ++; RUN: diff %t1 %t1-re-assemble ++ ++; RUN: llvm-mc %t.dir/2.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 ++; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ ++; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble ++; RUN: diff %t2 %t2-re-assemble ++ ++; RUN: llvm-mc %t.dir/3.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 ++; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ ++; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble ++; RUN: diff %t3 %t3-re-assemble ++ ++;--- 1.s ++.amdhsa_kernel my_kernel_1 ++ .amdhsa_next_free_vgpr 23 ++ .amdhsa_next_free_sgpr 0 ++.end_amdhsa_kernel ++ ++;--- 2.s ++.amdhsa_kernel my_kernel_2 ++ .amdhsa_next_free_vgpr 14 ++ .amdhsa_next_free_sgpr 0 ++.end_amdhsa_kernel ++ ++;--- 3.s ++.amdhsa_kernel my_kernel_3 ++ .amdhsa_next_free_vgpr 32 ++ .amdhsa_next_free_sgpr 0 ++.end_amdhsa_kernel +diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s +new file mode 100644 +--- /dev/null ++++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s +@@ -0,0 +1,58 @@ ++;; Entirely zeroed kernel descriptor (for GFX10). ++ ++; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj -o %t ++; RUN: llvm-objdump -s -j .text %t | FileCheck --check-prefix=OBJDUMP %s ++ ++;; TODO: ++;; This file and kd-zeroed-raw.s should produce the same output for the kernel ++;; descriptor - a block of 64 zeroed bytes. But looks like the assembler sets ++;; the FWD_PROGRESS bit in COMPUTE_PGM_RSRC1 to 1 even when the directive ++;; mentions 0 (see line 36). ++ ++;; Check the raw bytes right now. ++ ++; OBJDUMP: 0000 00000000 00000000 00000000 00000000 ++; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 ++; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 ++; OBJDUMP-NEXT: 0030 01000000 00000000 00000000 00000000 ++ ++.amdhsa_kernel my_kernel ++ .amdhsa_group_segment_fixed_size 0 ++ .amdhsa_private_segment_fixed_size 0 ++ .amdhsa_next_free_vgpr 8 ++ .amdhsa_reserve_vcc 0 ++ .amdhsa_reserve_flat_scratch 0 ++ .amdhsa_reserve_xnack_mask 0 ++ .amdhsa_next_free_sgpr 8 ++ .amdhsa_float_round_mode_32 0 ++ .amdhsa_float_round_mode_16_64 0 ++ .amdhsa_float_denorm_mode_32 0 ++ .amdhsa_float_denorm_mode_16_64 0 ++ .amdhsa_dx10_clamp 0 ++ .amdhsa_ieee_mode 0 ++ .amdhsa_fp16_overflow 0 ++ .amdhsa_workgroup_processor_mode 0 ++ .amdhsa_memory_ordered 0 ++ .amdhsa_forward_progress 0 ++ .amdhsa_system_sgpr_private_segment_wavefront_offset 0 ++ .amdhsa_system_sgpr_workgroup_id_x 0 ++ .amdhsa_system_sgpr_workgroup_id_y 0 ++ .amdhsa_system_sgpr_workgroup_id_z 0 ++ .amdhsa_system_sgpr_workgroup_info 0 ++ .amdhsa_system_vgpr_workitem_id 0 ++ .amdhsa_exception_fp_ieee_invalid_op 0 ++ .amdhsa_exception_fp_denorm_src 0 ++ .amdhsa_exception_fp_ieee_div_zero 0 ++ .amdhsa_exception_fp_ieee_overflow 0 ++ .amdhsa_exception_fp_ieee_underflow 0 ++ .amdhsa_exception_fp_ieee_inexact 0 ++ .amdhsa_exception_int_div_zero 0 ++ .amdhsa_user_sgpr_private_segment_buffer 0 ++ .amdhsa_user_sgpr_dispatch_ptr 0 ++ .amdhsa_user_sgpr_queue_ptr 0 ++ .amdhsa_user_sgpr_kernarg_segment_ptr 0 ++ .amdhsa_user_sgpr_dispatch_id 0 ++ .amdhsa_user_sgpr_flat_scratch_init 0 ++ .amdhsa_user_sgpr_private_segment_size 0 ++ .amdhsa_wavefront_size32 0 ++.end_amdhsa_kernel +diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s +new file mode 100644 +--- /dev/null ++++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s +@@ -0,0 +1,53 @@ ++;; Entirely zeroed kernel descriptor (for GFX9). ++ ++; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 ++; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ ++; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 ++; RUN: diff %t1 %t2 ++ ++; RUN: llvm-objdump -s -j .text %t1 | FileCheck --check-prefix=OBJDUMP %s ++ ++; OBJDUMP: 0000 00000000 00000000 00000000 00000000 ++; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 ++; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 ++; OBJDUMP-NEXT: 0030 00000000 00000000 00000000 00000000 ++ ++;; This file and kd-zeroed-raw.s produce the same output for the kernel ++;; descriptor - a block of 64 zeroed bytes. ++ ++.amdhsa_kernel my_kernel ++ .amdhsa_group_segment_fixed_size 0 ++ .amdhsa_private_segment_fixed_size 0 ++ .amdhsa_next_free_vgpr 0 ++ .amdhsa_reserve_vcc 0 ++ .amdhsa_reserve_flat_scratch 0 ++ .amdhsa_reserve_xnack_mask 0 ++ .amdhsa_next_free_sgpr 0 ++ .amdhsa_float_round_mode_32 0 ++ .amdhsa_float_round_mode_16_64 0 ++ .amdhsa_float_denorm_mode_32 0 ++ .amdhsa_float_denorm_mode_16_64 0 ++ .amdhsa_dx10_clamp 0 ++ .amdhsa_ieee_mode 0 ++ .amdhsa_fp16_overflow 0 ++ .amdhsa_system_sgpr_private_segment_wavefront_offset 0 ++ .amdhsa_system_sgpr_workgroup_id_x 0 ++ .amdhsa_system_sgpr_workgroup_id_y 0 ++ .amdhsa_system_sgpr_workgroup_id_z 0 ++ .amdhsa_system_sgpr_workgroup_info 0 ++ .amdhsa_system_vgpr_workitem_id 0 ++ .amdhsa_exception_fp_ieee_invalid_op 0 ++ .amdhsa_exception_fp_denorm_src 0 ++ .amdhsa_exception_fp_ieee_div_zero 0 ++ .amdhsa_exception_fp_ieee_overflow 0 ++ .amdhsa_exception_fp_ieee_underflow 0 ++ .amdhsa_exception_fp_ieee_inexact 0 ++ .amdhsa_exception_int_div_zero 0 ++ .amdhsa_user_sgpr_private_segment_buffer 0 ++ .amdhsa_user_sgpr_dispatch_ptr 0 ++ .amdhsa_user_sgpr_queue_ptr 0 ++ .amdhsa_user_sgpr_kernarg_segment_ptr 0 ++ .amdhsa_user_sgpr_dispatch_id 0 ++ .amdhsa_user_sgpr_flat_scratch_init 0 ++ .amdhsa_user_sgpr_private_segment_size 0 ++.end_amdhsa_kernel +diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s +new file mode 100644 +--- /dev/null ++++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s +@@ -0,0 +1,41 @@ ++; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 ++; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ ++; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 ++; RUN: llvm-objdump -s -j .text %t2 | FileCheck --check-prefix=OBJDUMP %s ++ ++;; Not running lit-test over gfx10 (see kd-zeroed-gfx10.s for details). ++;; kd-zeroed-raw.s and kd-zeroed-*.s should produce the same output for the ++;; kernel descriptor - a block of 64 zeroed bytes. ++ ++;; The disassembly will produce the contents of kd-zeroed-*.s which on being ++;; assembled contains additional relocation info. A diff over the entire object ++;; will fail in this case. So we check by looking the bytes in .text. ++ ++; OBJDUMP: 0000 00000000 00000000 00000000 00000000 ++; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 ++; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 ++; OBJDUMP-NEXT: 0030 00000000 00000000 00000000 00000000 ++ ++;; The entire object is zeroed out. ++ ++.type my_kernel.kd, @object ++.size my_kernel.kd, 64 ++my_kernel.kd: ++ .long 0x00000000 ;; group_segment_fixed_size ++ .long 0x00000000 ;; private_segment_fixed_size ++ .quad 0x0000000000000000 ;; reserved bytes. ++ .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. ++ ++ ;; 20 reserved bytes. ++ .quad 0x0000000000000000 ++ .quad 0x0000000000000000 ++ .long 0x00000000 ++ ++ .long 0x00000000 ;; compute_PGM_RSRC3 ++ .long 0x00000000 ;; compute_PGM_RSRC1 ++ .long 0x00000000 ;; compute_PGM_RSRC2 ++ .short 0x0000 ;; additional fields. ++ ++ ;; 6 reserved bytes. ++ .long 0x0000000 ++ .short 0x0000 +diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp +--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp ++++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp +@@ -1854,23 +1854,6 @@ + outs() << SectionName << ":\n"; + } + +- if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { +- if (Symbols[SI].Type == ELF::STT_AMDGPU_HSA_KERNEL) { +- // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes) +- Start += 256; +- } +- if (SI == SE - 1 || +- Symbols[SI + 1].Type == ELF::STT_AMDGPU_HSA_KERNEL) { +- // cut trailing zeroes at the end of kernel +- // cut up to 256 bytes +- const uint64_t EndAlign = 256; +- const auto Limit = End - (std::min)(EndAlign, End - Start); +- while (End > Limit && +- *reinterpret_cast(&Bytes[End - 4]) == 0) +- End -= 4; +- } +- } +- + outs() << '\n'; + if (!NoLeadingAddr) + outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", + diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h index d1c2147536a72..48a09ac48005d 100644 --- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -162,39 +162,49 @@ struct kernel_descriptor_t { uint8_t reserved2[6]; }; +enum : uint32_t { + GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0, + PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4, + RESERVED0_OFFSET = 8, + KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16, + RESERVED1_OFFSET = 24, + COMPUTE_PGM_RSRC3_OFFSET = 44, + COMPUTE_PGM_RSRC1_OFFSET = 48, + COMPUTE_PGM_RSRC2_OFFSET = 52, + KERNEL_CODE_PROPERTIES_OFFSET = 56, + RESERVED2_OFFSET = 58, +}; + static_assert( sizeof(kernel_descriptor_t) == 64, "invalid size for kernel_descriptor_t"); -static_assert( - offsetof(kernel_descriptor_t, group_segment_fixed_size) == 0, - "invalid offset for group_segment_fixed_size"); -static_assert( - offsetof(kernel_descriptor_t, private_segment_fixed_size) == 4, - "invalid offset for private_segment_fixed_size"); -static_assert( - offsetof(kernel_descriptor_t, reserved0) == 8, - "invalid offset for reserved0"); -static_assert( - offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == 16, - "invalid offset for kernel_code_entry_byte_offset"); -static_assert( - offsetof(kernel_descriptor_t, reserved1) == 24, - "invalid offset for reserved1"); -static_assert( - offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == 44, - "invalid offset for compute_pgm_rsrc3"); -static_assert( - offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48, - "invalid offset for compute_pgm_rsrc1"); -static_assert( - offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == 52, - "invalid offset for compute_pgm_rsrc2"); -static_assert( - offsetof(kernel_descriptor_t, kernel_code_properties) == 56, - "invalid offset for kernel_code_properties"); -static_assert( - offsetof(kernel_descriptor_t, reserved2) == 58, - "invalid offset for reserved2"); +static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) == + GROUP_SEGMENT_FIXED_SIZE_OFFSET, + "invalid offset for group_segment_fixed_size"); +static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) == + PRIVATE_SEGMENT_FIXED_SIZE_OFFSET, + "invalid offset for private_segment_fixed_size"); +static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET, + "invalid offset for reserved0"); +static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == + KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET, + "invalid offset for kernel_code_entry_byte_offset"); +static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET, + "invalid offset for reserved1"); +static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == + COMPUTE_PGM_RSRC3_OFFSET, + "invalid offset for compute_pgm_rsrc3"); +static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == + COMPUTE_PGM_RSRC1_OFFSET, + "invalid offset for compute_pgm_rsrc1"); +static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == + COMPUTE_PGM_RSRC2_OFFSET, + "invalid offset for compute_pgm_rsrc2"); +static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) == + KERNEL_CODE_PROPERTIES_OFFSET, + "invalid offset for kernel_code_properties"); +static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET, + "invalid offset for reserved2"); } // end namespace amdhsa } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index b7dde61f608bf..5955cc75c8ea2 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -34,6 +34,7 @@ #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/AMDHSAKernelDescriptor.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -1225,6 +1226,350 @@ bool AMDGPUDisassembler::isGFX10() const { return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; } +//===----------------------------------------------------------------------===// +// AMDGPU specific symbol handling +//===----------------------------------------------------------------------===// +#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ + do { \ + KdStream << Indent << DIRECTIVE " " \ + << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ + } while (0) + +// NOLINTNEXTLINE(readability-identifier-naming) +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( + uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { + using namespace amdhsa; + StringRef Indent = "\t"; + + // We cannot accurately backward compute #VGPRs used from + // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same + // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we + // simply calculate the inverse of what the assembler does. + + uint32_t GranulatedWorkitemVGPRCount = + (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >> + COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT; + + uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) * + AMDGPU::IsaInfo::getVGPREncodingGranule(&STI); + + KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n'; + + // We cannot backward compute values used to calculate + // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following + // directives can't be computed: + // .amdhsa_reserve_vcc + // .amdhsa_reserve_flat_scratch + // .amdhsa_reserve_xnack_mask + // They take their respective default values if not specified in the assembly. + // + // GRANULATED_WAVEFRONT_SGPR_COUNT + // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK) + // + // We compute the inverse as though all directives apart from NEXT_FREE_SGPR + // are set to 0. So while disassembling we consider that: + // + // GRANULATED_WAVEFRONT_SGPR_COUNT + // = f(NEXT_FREE_SGPR + 0 + 0 + 0) + // + // The disassembler cannot recover the original values of those 3 directives. + + uint32_t GranulatedWavefrontSGPRCount = + (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >> + COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT; + + if (isGFX10() && GranulatedWavefrontSGPRCount) + return MCDisassembler::Fail; + + uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) * + AMDGPU::IsaInfo::getSGPREncodingGranule(&STI); + + KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n'; + KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n'; + KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n'; + KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n"; + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_float_round_mode_32", + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); + PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64", + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); + PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32", + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); + PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64", + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY) + return MCDisassembler::Fail; + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0) + return MCDisassembler::Fail; + + if (isGFX10()) { + PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode", + COMPUTE_PGM_RSRC1_WGP_MODE); + PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED); + PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS); + } + return MCDisassembler::Success; +} + +// NOLINTNEXTLINE(readability-identifier-naming) +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( + uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { + using namespace amdhsa; + StringRef Indent = "\t"; + PRINT_DIRECTIVE( + ".amdhsa_system_sgpr_private_segment_wavefront_offset", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); + PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id", + COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH) + return MCDisassembler::Fail; + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY) + return MCDisassembler::Fail; + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE( + ".amdhsa_exception_fp_ieee_invalid_op", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); + PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); + PRINT_DIRECTIVE( + ".amdhsa_exception_fp_ieee_div_zero", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); + PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); + PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); + PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); + PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0) + return MCDisassembler::Fail; + + return MCDisassembler::Success; +} + +#undef PRINT_DIRECTIVE + +MCDisassembler::DecodeStatus +AMDGPUDisassembler::decodeKernelDescriptorDirective( + DataExtractor::Cursor &Cursor, ArrayRef Bytes, + raw_string_ostream &KdStream) const { +#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ + do { \ + KdStream << Indent << DIRECTIVE " " \ + << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ + } while (0) + + uint16_t TwoByteBuffer = 0; + uint32_t FourByteBuffer = 0; + uint64_t EightByteBuffer = 0; + + StringRef ReservedBytes; + StringRef Indent = "\t"; + + assert(Bytes.size() == 64); + DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8); + + switch (Cursor.tell()) { + case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer + << '\n'; + return MCDisassembler::Success; + + case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + KdStream << Indent << ".amdhsa_private_segment_fixed_size " + << FourByteBuffer << '\n'; + return MCDisassembler::Success; + + case amdhsa::RESERVED0_OFFSET: + // 8 reserved bytes, must be 0. + EightByteBuffer = DE.getU64(Cursor); + if (EightByteBuffer) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET: + // KERNEL_CODE_ENTRY_BYTE_OFFSET + // So far no directive controls this for Code Object V3, so simply skip for + // disassembly. + DE.skip(Cursor, 8); + return MCDisassembler::Success; + + case amdhsa::RESERVED1_OFFSET: + // 20 reserved bytes, must be 0. + ReservedBytes = DE.getBytes(Cursor, 20); + for (int I = 0; I < 20; ++I) { + if (ReservedBytes[I] != 0) { + return MCDisassembler::Fail; + } + } + return MCDisassembler::Success; + + case amdhsa::COMPUTE_PGM_RSRC3_OFFSET: + // COMPUTE_PGM_RSRC3 + // - Only set for GFX10, GFX6-9 have this to be 0. + // - Currently no directives directly control this. + FourByteBuffer = DE.getU32(Cursor); + if (!isGFX10() && FourByteBuffer) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::COMPUTE_PGM_RSRC1_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) == + MCDisassembler::Fail) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::COMPUTE_PGM_RSRC2_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) == + MCDisassembler::Fail) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET: + using namespace amdhsa; + TwoByteBuffer = DE.getU16(Cursor); + + PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); + + if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0) + return MCDisassembler::Fail; + + // Reserved for GFX9 + if (isGFX9() && + (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) { + return MCDisassembler::Fail; + } else if (isGFX10()) { + PRINT_DIRECTIVE(".amdhsa_wavefront_size32", + KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); + } + + if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) + return MCDisassembler::Fail; + + return MCDisassembler::Success; + + case amdhsa::RESERVED2_OFFSET: + // 6 bytes from here are reserved, must be 0. + ReservedBytes = DE.getBytes(Cursor, 6); + for (int I = 0; I < 6; ++I) { + if (ReservedBytes[I] != 0) + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + default: + llvm_unreachable("Unhandled index. Case statements cover everything."); + return MCDisassembler::Fail; + } +#undef PRINT_DIRECTIVE +} + +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor( + StringRef KdName, ArrayRef Bytes, uint64_t KdAddress) const { + // CP microcode requires the kernel descriptor to be 64 aligned. + if (Bytes.size() != 64 || KdAddress % 64 != 0) + return MCDisassembler::Fail; + + std::string Kd; + raw_string_ostream KdStream(Kd); + KdStream << ".amdhsa_kernel " << KdName << '\n'; + + DataExtractor::Cursor C(0); + while (C && C.tell() < Bytes.size()) { + MCDisassembler::DecodeStatus Status = + decodeKernelDescriptorDirective(C, Bytes, KdStream); + + cantFail(C.takeError()); + + if (Status == MCDisassembler::Fail) + return MCDisassembler::Fail; + } + KdStream << ".end_amdhsa_kernel\n"; + outs() << KdStream.str(); + return MCDisassembler::Success; +} + +Optional +AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, + ArrayRef Bytes, uint64_t Address, + raw_ostream &CStream) const { + // Right now only kernel descriptor needs to be handled. + // We ignore all other symbols for target specific handling. + // TODO: + // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code + // Object V2 and V3 when symbols are marked protected. + + // amd_kernel_code_t for Code Object V2. + if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) { + Size = 256; + return MCDisassembler::Fail; + } + + // Code Object V3 kernel descriptors. + StringRef Name = Symbol.Name; + if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) { + Size = 64; // Size = 64 regardless of success or failure. + return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address); + } + return None; +} + //===----------------------------------------------------------------------===// // AMDGPUSymbolizer //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index f975af409a096..315602c35288c 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -17,10 +17,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/DataExtractor.h" #include #include @@ -66,6 +67,33 @@ class AMDGPUDisassembler : public MCDisassembler { DecodeStatus tryDecodeInst(const uint8_t* Table, MCInst &MI, uint64_t Inst, uint64_t Address) const; + Optional onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &CStream) const override; + + DecodeStatus decodeKernelDescriptor(StringRef KdName, ArrayRef Bytes, + uint64_t KdAddress) const; + + DecodeStatus + decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, + ArrayRef Bytes, + raw_string_ostream &KdStream) const; + + /// Decode as directives that handle COMPUTE_PGM_RSRC1. + /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC1. + /// \param KdStream - Stream to write the disassembled directives to. + // NOLINTNEXTLINE(readability-identifier-naming) + DecodeStatus decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, + raw_string_ostream &KdStream) const; + + /// Decode as directives that handle COMPUTE_PGM_RSRC2. + /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC2. + /// \param KdStream - Stream to write the disassembled directives to. + // NOLINTNEXTLINE(readability-identifier-naming) + DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, + raw_string_ostream &KdStream) const; + DecodeStatus convertSDWAInst(MCInst &MI) const; DecodeStatus convertDPP8Inst(MCInst &MI) const; DecodeStatus convertMIMGInst(MCInst &MI) const; diff --git a/llvm/test/CodeGen/AMDGPU/nop-data.ll b/llvm/test/CodeGen/AMDGPU/nop-data.ll index 7b6853acce285..e21ca97e8ffca 100644 --- a/llvm/test/CodeGen/AMDGPU/nop-data.ll +++ b/llvm/test/CodeGen/AMDGPU/nop-data.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-code-object-v3 -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - --mcpu=fiji | FileCheck %s ; CHECK: : -; CHECK-NEXT: s_endpgm +; CHECK: s_endpgm define amdgpu_kernel void @kernel0() align 256 { entry: ret void @@ -80,7 +80,7 @@ entry: ; CHECK-EMPTY: ; CHECK-NEXT: : -; CHECK-NEXT: s_endpgm +; CHECK: s_endpgm define amdgpu_kernel void @kernel1(i32 addrspace(1)* addrspace(4)* %ptr.out) align 256 { entry: ret void diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s new file mode 100644 index 0000000000000..eee3fd4b7103e --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s @@ -0,0 +1,37 @@ +;; Failure test. We create a malformed kernel descriptor (KD) by manually +;; setting the bytes, because one can't create a malformed KD using the +;; assembler directives. + +; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t.o + +; RUN: printf ".type my_kernel.kd, @object \nmy_kernel.kd:\n.size my_kernel.kd, 64\n" > %t1.sym_info +; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t.o \ +; RUN: | tail -n +9 > %t1.sym_content +; RUN: cat %t1.sym_info %t1.sym_content > %t1.s + +; RUN: llvm-mc %t1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t-re-assemble.o +; RUN: diff %t.o %t-re-assemble.o + +;; Test failure by setting one of the reserved bytes to non-zero value. + +.type my_kernel.kd, @object +.size my_kernel.kd, 64 +my_kernel.kd: + .long 0x00000000 ;; group_segment_fixed_size + .long 0x00000000 ;; private_segment_fixed_size + .quad 0x00FF000000000000 ;; reserved bytes. + .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. + + ;; 20 reserved bytes. + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .long 0x00000000 + + .long 0x00000000 ;; compute_PGM_RSRC3 + .long 0x00000000 ;; compute_PGM_RSRC1 + .long 0x00000000 ;; compute_PGM_RSRC2 + .short 0x0000 ;; additional fields. + + ;; 6 reserved bytes. + .long 0x0000000 + .short 0x0000 diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s new file mode 100644 index 0000000000000..0b798a298d398 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s @@ -0,0 +1,49 @@ +;; Test disassembly for GRANULATED_WAVEFRONT_SGPR_COUNT in the kernel descriptor. + +; RUN: split-file %s %t.dir + +; RUN: llvm-mc %t.dir/1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble +; RUN: diff %t1 %t1-re-assemble + +; RUN: llvm-mc %t.dir/2.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble +; RUN: diff %t2 %t2-re-assemble + +; RUN: llvm-mc %t.dir/3.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble +; RUN: diff %t3 %t3-re-assemble + + +;--- 1.s +;; Only set next_free_sgpr. +.amdhsa_kernel my_kernel_1 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 42 + .amdhsa_reserve_flat_scratch 0 + .amdhsa_reserve_xnack_mask 0 + .amdhsa_reserve_vcc 0 +.end_amdhsa_kernel + +;--- 2.s +;; Only set other directives. +.amdhsa_kernel my_kernel_2 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_reserve_flat_scratch 1 + .amdhsa_reserve_xnack_mask 1 + .amdhsa_reserve_vcc 1 +.end_amdhsa_kernel + +;--- 3.s +;; Set all affecting directives. +.amdhsa_kernel my_kernel_3 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 35 + .amdhsa_reserve_flat_scratch 1 + .amdhsa_reserve_xnack_mask 1 + .amdhsa_reserve_vcc 1 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s new file mode 100644 index 0000000000000..a8883d2f74be7 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s @@ -0,0 +1,36 @@ +;; Test disassembly for GRANULATED_WORKITEM_VGPR_COUNT in the kernel descriptor. + +; RUN: split-file %s %t.dir + +; RUN: llvm-mc %t.dir/1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble +; RUN: diff %t1 %t1-re-assemble + +; RUN: llvm-mc %t.dir/2.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble +; RUN: diff %t2 %t2-re-assemble + +; RUN: llvm-mc %t.dir/3.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble +; RUN: diff %t3 %t3-re-assemble + +;--- 1.s +.amdhsa_kernel my_kernel_1 + .amdhsa_next_free_vgpr 23 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +;--- 2.s +.amdhsa_kernel my_kernel_2 + .amdhsa_next_free_vgpr 14 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +;--- 3.s +.amdhsa_kernel my_kernel_3 + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s new file mode 100644 index 0000000000000..803507a130c03 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s @@ -0,0 +1,58 @@ +;; Entirely zeroed kernel descriptor (for GFX10). + +; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj -o %t +; RUN: llvm-objdump -s -j .text %t | FileCheck --check-prefix=OBJDUMP %s + +;; TODO: +;; This file and kd-zeroed-raw.s should produce the same output for the kernel +;; descriptor - a block of 64 zeroed bytes. But looks like the assembler sets +;; the FWD_PROGRESS bit in COMPUTE_PGM_RSRC1 to 1 even when the directive +;; mentions 0 (see line 36). + +;; Check the raw bytes right now. + +; OBJDUMP: 0000 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0030 01000000 00000000 00000000 00000000 + +.amdhsa_kernel my_kernel + .amdhsa_group_segment_fixed_size 0 + .amdhsa_private_segment_fixed_size 0 + .amdhsa_next_free_vgpr 8 + .amdhsa_reserve_vcc 0 + .amdhsa_reserve_flat_scratch 0 + .amdhsa_reserve_xnack_mask 0 + .amdhsa_next_free_sgpr 8 + .amdhsa_float_round_mode_32 0 + .amdhsa_float_round_mode_16_64 0 + .amdhsa_float_denorm_mode_32 0 + .amdhsa_float_denorm_mode_16_64 0 + .amdhsa_dx10_clamp 0 + .amdhsa_ieee_mode 0 + .amdhsa_fp16_overflow 0 + .amdhsa_workgroup_processor_mode 0 + .amdhsa_memory_ordered 0 + .amdhsa_forward_progress 0 + .amdhsa_system_sgpr_private_segment_wavefront_offset 0 + .amdhsa_system_sgpr_workgroup_id_x 0 + .amdhsa_system_sgpr_workgroup_id_y 0 + .amdhsa_system_sgpr_workgroup_id_z 0 + .amdhsa_system_sgpr_workgroup_info 0 + .amdhsa_system_vgpr_workitem_id 0 + .amdhsa_exception_fp_ieee_invalid_op 0 + .amdhsa_exception_fp_denorm_src 0 + .amdhsa_exception_fp_ieee_div_zero 0 + .amdhsa_exception_fp_ieee_overflow 0 + .amdhsa_exception_fp_ieee_underflow 0 + .amdhsa_exception_fp_ieee_inexact 0 + .amdhsa_exception_int_div_zero 0 + .amdhsa_user_sgpr_private_segment_buffer 0 + .amdhsa_user_sgpr_dispatch_ptr 0 + .amdhsa_user_sgpr_queue_ptr 0 + .amdhsa_user_sgpr_kernarg_segment_ptr 0 + .amdhsa_user_sgpr_dispatch_id 0 + .amdhsa_user_sgpr_flat_scratch_init 0 + .amdhsa_user_sgpr_private_segment_size 0 + .amdhsa_wavefront_size32 0 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s new file mode 100644 index 0000000000000..de4fdf74d88e0 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s @@ -0,0 +1,53 @@ +;; Entirely zeroed kernel descriptor (for GFX9). + +; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ +; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: diff %t1 %t2 + +; RUN: llvm-objdump -s -j .text %t1 | FileCheck --check-prefix=OBJDUMP %s + +; OBJDUMP: 0000 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0030 00000000 00000000 00000000 00000000 + +;; This file and kd-zeroed-raw.s produce the same output for the kernel +;; descriptor - a block of 64 zeroed bytes. + +.amdhsa_kernel my_kernel + .amdhsa_group_segment_fixed_size 0 + .amdhsa_private_segment_fixed_size 0 + .amdhsa_next_free_vgpr 0 + .amdhsa_reserve_vcc 0 + .amdhsa_reserve_flat_scratch 0 + .amdhsa_reserve_xnack_mask 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_float_round_mode_32 0 + .amdhsa_float_round_mode_16_64 0 + .amdhsa_float_denorm_mode_32 0 + .amdhsa_float_denorm_mode_16_64 0 + .amdhsa_dx10_clamp 0 + .amdhsa_ieee_mode 0 + .amdhsa_fp16_overflow 0 + .amdhsa_system_sgpr_private_segment_wavefront_offset 0 + .amdhsa_system_sgpr_workgroup_id_x 0 + .amdhsa_system_sgpr_workgroup_id_y 0 + .amdhsa_system_sgpr_workgroup_id_z 0 + .amdhsa_system_sgpr_workgroup_info 0 + .amdhsa_system_vgpr_workitem_id 0 + .amdhsa_exception_fp_ieee_invalid_op 0 + .amdhsa_exception_fp_denorm_src 0 + .amdhsa_exception_fp_ieee_div_zero 0 + .amdhsa_exception_fp_ieee_overflow 0 + .amdhsa_exception_fp_ieee_underflow 0 + .amdhsa_exception_fp_ieee_inexact 0 + .amdhsa_exception_int_div_zero 0 + .amdhsa_user_sgpr_private_segment_buffer 0 + .amdhsa_user_sgpr_dispatch_ptr 0 + .amdhsa_user_sgpr_queue_ptr 0 + .amdhsa_user_sgpr_kernarg_segment_ptr 0 + .amdhsa_user_sgpr_dispatch_id 0 + .amdhsa_user_sgpr_flat_scratch_init 0 + .amdhsa_user_sgpr_private_segment_size 0 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s new file mode 100644 index 0000000000000..85554209d5d8f --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s @@ -0,0 +1,41 @@ +; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ +; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: llvm-objdump -s -j .text %t2 | FileCheck --check-prefix=OBJDUMP %s + +;; Not running lit-test over gfx10 (see kd-zeroed-gfx10.s for details). +;; kd-zeroed-raw.s and kd-zeroed-*.s should produce the same output for the +;; kernel descriptor - a block of 64 zeroed bytes. + +;; The disassembly will produce the contents of kd-zeroed-*.s which on being +;; assembled contains additional relocation info. A diff over the entire object +;; will fail in this case. So we check by looking the bytes in .text. + +; OBJDUMP: 0000 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0030 00000000 00000000 00000000 00000000 + +;; The entire object is zeroed out. + +.type my_kernel.kd, @object +.size my_kernel.kd, 64 +my_kernel.kd: + .long 0x00000000 ;; group_segment_fixed_size + .long 0x00000000 ;; private_segment_fixed_size + .quad 0x0000000000000000 ;; reserved bytes. + .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. + + ;; 20 reserved bytes. + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .long 0x00000000 + + .long 0x00000000 ;; compute_PGM_RSRC3 + .long 0x00000000 ;; compute_PGM_RSRC1 + .long 0x00000000 ;; compute_PGM_RSRC2 + .short 0x0000 ;; additional fields. + + ;; 6 reserved bytes. + .long 0x0000000 + .short 0x0000 diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index b63d08b90ff51..46ed7414dbb31 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1854,23 +1854,6 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, outs() << SectionName << ":\n"; } - if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { - if (Symbols[SI].Type == ELF::STT_AMDGPU_HSA_KERNEL) { - // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes) - Start += 256; - } - if (SI == SE - 1 || - Symbols[SI + 1].Type == ELF::STT_AMDGPU_HSA_KERNEL) { - // cut trailing zeroes at the end of kernel - // cut up to 256 bytes - const uint64_t EndAlign = 256; - const auto Limit = End - (std::min)(EndAlign, End - Start); - while (End > Limit && - *reinterpret_cast(&Bytes[End - 4]) == 0) - End -= 4; - } - } - outs() << '\n'; if (!NoLeadingAddr) outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", From bcd8422d75069624dc2daf7e5ff4b4f6cbcd6b71 Mon Sep 17 00:00:00 2001 From: Adam Czachorowski Date: Wed, 7 Oct 2020 17:02:16 +0200 Subject: [PATCH 233/321] [clangd] Fix argument type (bool->float). The default value is 1.3f, but it was cast to true, which is not a good base for code completion score. Differential Revision: https://reviews.llvm.org/D88970 --- clang-tools-extra/clangd/tool/ClangdMain.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 98daaf9573597..78d8355a2c5d9 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -185,7 +185,7 @@ opt RankingModel{ Hidden, }; -opt DecisionForestBase{ +opt DecisionForestBase{ "decision-forest-base", cat(Features), desc("Base for exponentiating the prediction from DecisionForest."), From 63dead2096cd6a2190ba11071938b937be8bf159 Mon Sep 17 00:00:00 2001 From: Arjun P Date: Wed, 7 Oct 2020 17:16:11 +0200 Subject: [PATCH 234/321] Introduce subtraction for FlatAffineConstraints Subtraction is a foundational arithmetic operation that is often used when computing, for example, data transfer sets or cache hits. Since the result of subtraction need not be a convex polytope, a new class `PresburgerSet` is introduced to represent unions of convex polytopes. Reviewed By: ftynse, bondhugula Differential Revision: https://reviews.llvm.org/D87068 --- mlir/include/mlir/Analysis/AffineStructures.h | 11 + .../mlir/Analysis/Presburger/Simplex.h | 3 + mlir/include/mlir/Analysis/PresburgerSet.h | 112 ++++ mlir/lib/Analysis/AffineStructures.cpp | 27 + mlir/lib/Analysis/CMakeLists.txt | 5 +- mlir/lib/Analysis/Presburger/CMakeLists.txt | 2 +- mlir/lib/Analysis/Presburger/Simplex.cpp | 10 + mlir/lib/Analysis/PresburgerSet.cpp | 316 +++++++++++ .../Analysis/AffineStructuresTest.cpp | 26 +- mlir/unittests/Analysis/CMakeLists.txt | 1 + .../Analysis/Presburger/CMakeLists.txt | 1 + mlir/unittests/Analysis/PresburgerSetTest.cpp | 524 ++++++++++++++++++ 12 files changed, 1015 insertions(+), 23 deletions(-) create mode 100644 mlir/include/mlir/Analysis/PresburgerSet.h create mode 100644 mlir/lib/Analysis/PresburgerSet.cpp create mode 100644 mlir/unittests/Analysis/PresburgerSetTest.cpp diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h index d64a24e713d13..25071db100e32 100644 --- a/mlir/include/mlir/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Analysis/AffineStructures.h @@ -97,6 +97,13 @@ class FlatAffineConstraints { ids.append(idArgs.begin(), idArgs.end()); } + /// Return a system with no constraints, i.e., one which is satisfied by all + /// points. + static FlatAffineConstraints getUniverse(unsigned numDims = 0, + unsigned numSymbols = 0) { + return FlatAffineConstraints(numDims, numSymbols); + } + /// Create a flat affine constraint system from an AffineValueMap or a list of /// these. The constructed system will only include equalities. explicit FlatAffineConstraints(const AffineValueMap &avm); @@ -153,6 +160,10 @@ class FlatAffineConstraints { /// Returns such a point if one exists, or an empty Optional otherwise. Optional> findIntegerSample() const; + /// Returns true if the given point satisfies the constraints, or false + /// otherwise. + bool containsPoint(ArrayRef point) const; + // Clones this object. std::unique_ptr clone() const; diff --git a/mlir/include/mlir/Analysis/Presburger/Simplex.h b/mlir/include/mlir/Analysis/Presburger/Simplex.h index 209382013de2a..05d241e60958c 100644 --- a/mlir/include/mlir/Analysis/Presburger/Simplex.h +++ b/mlir/include/mlir/Analysis/Presburger/Simplex.h @@ -169,6 +169,9 @@ class Simplex { /// Rollback to a snapshot. This invalidates all later snapshots. void rollback(unsigned snapshot); + /// Add all the constraints from the given FlatAffineConstraints. + void intersectFlatAffineConstraints(const FlatAffineConstraints &fac); + /// Compute the maximum or minimum value of the given row, depending on /// direction. The specified row is never pivoted. /// diff --git a/mlir/include/mlir/Analysis/PresburgerSet.h b/mlir/include/mlir/Analysis/PresburgerSet.h new file mode 100644 index 0000000000000..1f3a10a8a6242 --- /dev/null +++ b/mlir/include/mlir/Analysis/PresburgerSet.h @@ -0,0 +1,112 @@ +//===- Set.h - MLIR PresburgerSet Class -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A class to represent unions of FlatAffineConstraints. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_ANALYSIS_PRESBURGERSET_H +#define MLIR_ANALYSIS_PRESBURGERSET_H + +#include "mlir/Analysis/AffineStructures.h" + +namespace mlir { + +/// This class can represent a union of FlatAffineConstraints, with support for +/// union, intersection, subtraction and complement operations, as well as +/// sampling. +/// +/// The FlatAffineConstraints (FACs) are stored in a vector, and the set +/// represents the union of these FACs. An empty list corresponds to the empty +/// set. +/// +/// Note that there are no invariants guaranteed on the list of FACs other than +/// that they are all in the same space, i.e., they all have the same number of +/// dimensions and symbols. For example, the FACs may overlap each other. +class PresburgerSet { +public: + explicit PresburgerSet(const FlatAffineConstraints &fac); + + /// Return the number of FACs in the union. + unsigned getNumFACs() const; + + /// Return the number of real dimensions. + unsigned getNumDims() const; + + /// Return the number of symbolic dimensions. + unsigned getNumSyms() const; + + /// Return a reference to the list of FlatAffineConstraints. + ArrayRef getAllFlatAffineConstraints() const; + + /// Return the FlatAffineConstraints at the specified index. + const FlatAffineConstraints &getFlatAffineConstraints(unsigned index) const; + + /// Mutate this set, turning it into the union of this set and the given + /// FlatAffineConstraints. + void unionFACInPlace(const FlatAffineConstraints &fac); + + /// Mutate this set, turning it into the union of this set and the given set. + void unionSetInPlace(const PresburgerSet &set); + + /// Return the union of this set and the given set. + PresburgerSet unionSet(const PresburgerSet &set) const; + + /// Return the intersection of this set and the given set. + PresburgerSet intersect(const PresburgerSet &set) const; + + /// Return true if the set contains the given point, or false otherwise. + bool containsPoint(ArrayRef point) const; + + /// Print the set's internal state. + void print(raw_ostream &os) const; + void dump() const; + + /// Return the complement of this set. + PresburgerSet complement() const; + + /// Return the set difference of this set and the given set, i.e., + /// return `this \ set`. + PresburgerSet subtract(const PresburgerSet &set) const; + + /// Return a universe set of the specified type that contains all points. + static PresburgerSet getUniverse(unsigned nDim = 0, unsigned nSym = 0); + /// Return an empty set of the specified type that contains no points. + static PresburgerSet getEmptySet(unsigned nDim = 0, unsigned nSym = 0); + + /// Return true if all the sets in the union are known to be integer empty + /// false otherwise. + bool isIntegerEmpty() const; + + /// Find an integer sample from the given set. This should not be called if + /// any of the FACs in the union are unbounded. + bool findIntegerSample(SmallVectorImpl &sample); + +private: + /// Construct an empty PresburgerSet. + PresburgerSet(unsigned nDim = 0, unsigned nSym = 0) + : nDim(nDim), nSym(nSym) {} + + /// Return the set difference fac \ set. + static PresburgerSet getSetDifference(FlatAffineConstraints fac, + const PresburgerSet &set); + + /// Number of identifiers corresponding to real dimensions. + unsigned nDim; + + /// Number of symbolic dimensions, unknown but constant for analysis, as in + /// FlatAffineConstraints. + unsigned nSym; + + /// The list of flatAffineConstraints that this set is the union of. + SmallVector flatAffineConstraints; +}; + +} // namespace mlir + +#endif // MLIR_ANALYSIS_PRESBURGERSET_H diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index 5b7f4d4982d02..341dde523e8bd 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -1056,6 +1056,33 @@ FlatAffineConstraints::findIntegerSample() const { return Simplex(*this).findIntegerSample(); } +/// Helper to evaluate an affine expression at a point. +/// The expression is a list of coefficients for the dimensions followed by the +/// constant term. +static int64_t valueAt(ArrayRef expr, ArrayRef point) { + assert(expr.size() == 1 + point.size() && + "Dimensionalities of point and expresion don't match!"); + int64_t value = expr.back(); + for (unsigned i = 0; i < point.size(); ++i) + value += expr[i] * point[i]; + return value; +} + +/// A point satisfies an equality iff the value of the equality at the +/// expression is zero, and it satisfies an inequality iff the value of the +/// inequality at that point is non-negative. +bool FlatAffineConstraints::containsPoint(ArrayRef point) const { + for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) { + if (valueAt(getEquality(i), point) != 0) + return false; + } + for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) { + if (valueAt(getInequality(i), point) < 0) + return false; + } + return true; +} + /// Tightens inequalities given that we are dealing with integer spaces. This is /// analogous to the GCD test but applied to inequalities. The constant term can /// be reduced to the preceding multiple of the GCD of the coefficients, i.e., diff --git a/mlir/lib/Analysis/CMakeLists.txt b/mlir/lib/Analysis/CMakeLists.txt index 217a94995c0ab..4e334c94bd83f 100644 --- a/mlir/lib/Analysis/CMakeLists.txt +++ b/mlir/lib/Analysis/CMakeLists.txt @@ -5,6 +5,7 @@ set(LLVM_OPTIONAL_SOURCES Liveness.cpp LoopAnalysis.cpp NestedMatcher.cpp + PresburgerSet.cpp SliceAnalysis.cpp Utils.cpp ) @@ -25,7 +26,6 @@ add_mlir_library(MLIRAnalysis MLIRCallInterfaces MLIRControlFlowInterfaces MLIRInferTypeOpInterface - MLIRPresburger MLIRSCF ) @@ -34,6 +34,7 @@ add_mlir_library(MLIRLoopAnalysis AffineStructures.cpp LoopAnalysis.cpp NestedMatcher.cpp + PresburgerSet.cpp Utils.cpp ADDITIONAL_HEADER_DIRS @@ -51,4 +52,4 @@ add_mlir_library(MLIRLoopAnalysis MLIRSCF ) -add_subdirectory(Presburger) +add_subdirectory(Presburger) \ No newline at end of file diff --git a/mlir/lib/Analysis/Presburger/CMakeLists.txt b/mlir/lib/Analysis/Presburger/CMakeLists.txt index 2561013696d9b..49cdd5ac14312 100644 --- a/mlir/lib/Analysis/Presburger/CMakeLists.txt +++ b/mlir/lib/Analysis/Presburger/CMakeLists.txt @@ -1,4 +1,4 @@ add_mlir_library(MLIRPresburger Simplex.cpp Matrix.cpp - ) + ) \ No newline at end of file diff --git a/mlir/lib/Analysis/Presburger/Simplex.cpp b/mlir/lib/Analysis/Presburger/Simplex.cpp index db1e48f50e8e9..65a8a689164fd 100644 --- a/mlir/lib/Analysis/Presburger/Simplex.cpp +++ b/mlir/lib/Analysis/Presburger/Simplex.cpp @@ -451,6 +451,16 @@ void Simplex::rollback(unsigned snapshot) { } } +/// Add all the constraints from the given FlatAffineConstraints. +void Simplex::intersectFlatAffineConstraints(const FlatAffineConstraints &fac) { + assert(fac.getNumIds() == numVariables() && + "FlatAffineConstraints must have same dimensionality as simplex"); + for (unsigned i = 0, e = fac.getNumInequalities(); i < e; ++i) + addInequality(fac.getInequality(i)); + for (unsigned i = 0, e = fac.getNumEqualities(); i < e; ++i) + addEquality(fac.getEquality(i)); +} + Optional Simplex::computeRowOptimum(Direction direction, unsigned row) { // Keep trying to find a pivot for the row in the specified direction. diff --git a/mlir/lib/Analysis/PresburgerSet.cpp b/mlir/lib/Analysis/PresburgerSet.cpp new file mode 100644 index 0000000000000..323dc3e56d549 --- /dev/null +++ b/mlir/lib/Analysis/PresburgerSet.cpp @@ -0,0 +1,316 @@ +//===- Set.cpp - MLIR PresburgerSet Class ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/PresburgerSet.h" +#include "mlir/Analysis/Presburger/Simplex.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" + +using namespace mlir; + +PresburgerSet::PresburgerSet(const FlatAffineConstraints &fac) + : nDim(fac.getNumDimIds()), nSym(fac.getNumSymbolIds()) { + unionFACInPlace(fac); +} + +unsigned PresburgerSet::getNumFACs() const { + return flatAffineConstraints.size(); +} + +unsigned PresburgerSet::getNumDims() const { return nDim; } + +unsigned PresburgerSet::getNumSyms() const { return nSym; } + +ArrayRef +PresburgerSet::getAllFlatAffineConstraints() const { + return flatAffineConstraints; +} + +const FlatAffineConstraints & +PresburgerSet::getFlatAffineConstraints(unsigned index) const { + assert(index < flatAffineConstraints.size() && "index out of bounds!"); + return flatAffineConstraints[index]; +} + +/// Assert that the FlatAffineConstraints and PresburgerSet live in +/// compatible spaces. +static void assertDimensionsCompatible(const FlatAffineConstraints &fac, + const PresburgerSet &set) { + assert(fac.getNumDimIds() == set.getNumDims() && + "Number of dimensions of the FlatAffineConstraints and PresburgerSet" + "do not match!"); + assert(fac.getNumSymbolIds() == set.getNumSyms() && + "Number of symbols of the FlatAffineConstraints and PresburgerSet" + "do not match!"); +} + +/// Assert that the two PresburgerSets live in compatible spaces. +static void assertDimensionsCompatible(const PresburgerSet &setA, + const PresburgerSet &setB) { + assert(setA.getNumDims() == setB.getNumDims() && + "Number of dimensions of the PresburgerSets do not match!"); + assert(setA.getNumSyms() == setB.getNumSyms() && + "Number of symbols of the PresburgerSets do not match!"); +} + +/// Mutate this set, turning it into the union of this set and the given +/// FlatAffineConstraints. +void PresburgerSet::unionFACInPlace(const FlatAffineConstraints &fac) { + assertDimensionsCompatible(fac, *this); + flatAffineConstraints.push_back(fac); +} + +/// Mutate this set, turning it into the union of this set and the given set. +/// +/// This is accomplished by simply adding all the FACs of the given set to this +/// set. +void PresburgerSet::unionSetInPlace(const PresburgerSet &set) { + assertDimensionsCompatible(set, *this); + for (const FlatAffineConstraints &fac : set.flatAffineConstraints) + unionFACInPlace(fac); +} + +/// Return the union of this set and the given set. +PresburgerSet PresburgerSet::unionSet(const PresburgerSet &set) const { + assertDimensionsCompatible(set, *this); + PresburgerSet result = *this; + result.unionSetInPlace(set); + return result; +} + +/// A point is contained in the union iff any of the parts contain the point. +bool PresburgerSet::containsPoint(ArrayRef point) const { + for (const FlatAffineConstraints &fac : flatAffineConstraints) { + if (fac.containsPoint(point)) + return true; + } + return false; +} + +PresburgerSet PresburgerSet::getUniverse(unsigned nDim, unsigned nSym) { + PresburgerSet result(nDim, nSym); + result.unionFACInPlace(FlatAffineConstraints::getUniverse(nDim, nSym)); + return result; +} + +PresburgerSet PresburgerSet::getEmptySet(unsigned nDim, unsigned nSym) { + return PresburgerSet(nDim, nSym); +} + +// Return the intersection of this set with the given set. +// +// We directly compute (S_1 or S_2 ...) and (T_1 or T_2 ...) +// as (S_1 and T_1) or (S_1 and T_2) or ... +PresburgerSet PresburgerSet::intersect(const PresburgerSet &set) const { + assertDimensionsCompatible(set, *this); + + PresburgerSet result(nDim, nSym); + for (const FlatAffineConstraints &csA : flatAffineConstraints) { + for (const FlatAffineConstraints &csB : set.flatAffineConstraints) { + FlatAffineConstraints intersection(csA); + intersection.append(csB); + if (!intersection.isEmpty()) + result.unionFACInPlace(std::move(intersection)); + } + } + return result; +} + +/// Return `coeffs` with all the elements negated. +static SmallVector getNegatedCoeffs(ArrayRef coeffs) { + SmallVector negatedCoeffs; + negatedCoeffs.reserve(coeffs.size()); + for (int64_t coeff : coeffs) + negatedCoeffs.emplace_back(-coeff); + return negatedCoeffs; +} + +/// Return the complement of the given inequality. +/// +/// The complement of a_1 x_1 + ... + a_n x_ + c >= 0 is +/// a_1 x_1 + ... + a_n x_ + c < 0, i.e., -a_1 x_1 - ... - a_n x_ - c - 1 >= 0. +static SmallVector getComplementIneq(ArrayRef ineq) { + SmallVector coeffs; + coeffs.reserve(ineq.size()); + for (int64_t coeff : ineq) + coeffs.emplace_back(-coeff); + --coeffs.back(); + return coeffs; +} + +/// Return the set difference b \ s and accumulate the result into `result`. +/// `simplex` must correspond to b. +/// +/// In the following, V denotes union, ^ denotes intersection, \ denotes set +/// difference and ~ denotes complement. +/// Let b be the FlatAffineConstraints and s = (V_i s_i) be the set. We want +/// b \ (V_i s_i). +/// +/// Let s_i = ^_j s_ij, where each s_ij is a single inequality. To compute +/// b \ s_i = b ^ ~s_i, we partition s_i based on the first violated inequality: +/// ~s_i = (~s_i1) V (s_i1 ^ ~s_i2) V (s_i1 ^ s_i2 ^ ~s_i3) V ... +/// And the required result is (b ^ ~s_i1) V (b ^ s_i1 ^ ~s_i2) V ... +/// We recurse by subtracting V_{j > i} S_j from each of these parts and +/// returning the union of the results. Each equality is handled as a +/// conjunction of two inequalities. +/// +/// As a heuristic, we try adding all the constraints and check if simplex +/// says that the intersection is empty. Also, in the process we find out that +/// some constraints are redundant. These redundant constraints are ignored. +static void subtractRecursively(FlatAffineConstraints &b, Simplex &simplex, + const PresburgerSet &s, unsigned i, + PresburgerSet &result) { + if (i == s.getNumFACs()) { + result.unionFACInPlace(b); + return; + } + const FlatAffineConstraints &sI = s.getFlatAffineConstraints(i); + unsigned initialSnapshot = simplex.getSnapshot(); + unsigned offset = simplex.numConstraints(); + simplex.intersectFlatAffineConstraints(sI); + + if (simplex.isEmpty()) { + /// b ^ s_i is empty, so b \ s_i = b. We move directly to i + 1. + simplex.rollback(initialSnapshot); + subtractRecursively(b, simplex, s, i + 1, result); + return; + } + + simplex.detectRedundant(); + llvm::SmallBitVector isMarkedRedundant; + for (unsigned j = 0; j < 2 * sI.getNumEqualities() + sI.getNumInequalities(); + j++) + isMarkedRedundant.push_back(simplex.isMarkedRedundant(offset + j)); + + simplex.rollback(initialSnapshot); + + // Recurse with the part b ^ ~ineq. Note that b is modified throughout + // subtractRecursively. At the time this function is called, the current b is + // actually equal to b ^ s_i1 ^ s_i2 ^ ... ^ s_ij, and ineq is the next + // inequality, s_{i,j+1}. This function recurses into the next level i + 1 + // with the part b ^ s_i1 ^ s_i2 ^ ... ^ s_ij ^ ~s_{i,j+1}. + auto recurseWithInequality = [&, i](ArrayRef ineq) { + size_t snapshot = simplex.getSnapshot(); + b.addInequality(ineq); + simplex.addInequality(ineq); + subtractRecursively(b, simplex, s, i + 1, result); + b.removeInequality(b.getNumInequalities() - 1); + simplex.rollback(snapshot); + }; + + // For each inequality ineq, we first recurse with the part where ineq + // is not satisfied, and then add the ineq to b and simplex because + // ineq must be satisfied by all later parts. + auto processInequality = [&](ArrayRef ineq) { + recurseWithInequality(getComplementIneq(ineq)); + b.addInequality(ineq); + simplex.addInequality(ineq); + }; + + // processInequality appends some additional constraints to b. We want to + // rollback b to its initial state before returning, which we will do by + // removing all constraints beyond the original number of inequalities + // and equalities, so we store these counts first. + unsigned originalNumIneqs = b.getNumInequalities(); + unsigned originalNumEqs = b.getNumEqualities(); + + for (unsigned j = 0, e = sI.getNumInequalities(); j < e; j++) { + if (isMarkedRedundant[j]) + continue; + processInequality(sI.getInequality(j)); + } + + offset = sI.getNumInequalities(); + for (unsigned j = 0, e = sI.getNumEqualities(); j < e; ++j) { + const ArrayRef &coeffs = sI.getEquality(j); + // Same as the above loop for inequalities, done once each for the positive + // and negative inequalities that make up this equality. + if (!isMarkedRedundant[offset + 2 * j]) + processInequality(coeffs); + if (!isMarkedRedundant[offset + 2 * j + 1]) + processInequality(getNegatedCoeffs(coeffs)); + } + + // Rollback b and simplex to their initial states. + for (unsigned i = b.getNumInequalities(); i > originalNumIneqs; --i) + b.removeInequality(i - 1); + + for (unsigned i = b.getNumEqualities(); i > originalNumEqs; --i) + b.removeEquality(i - 1); + + simplex.rollback(initialSnapshot); +} + +/// Return the set difference fac \ set. +/// +/// The FAC here is modified in subtractRecursively, so it cannot be a const +/// reference even though it is restored to its original state before returning +/// from that function. +PresburgerSet PresburgerSet::getSetDifference(FlatAffineConstraints fac, + const PresburgerSet &set) { + assertDimensionsCompatible(fac, set); + if (fac.isEmptyByGCDTest()) + return PresburgerSet::getEmptySet(fac.getNumDimIds(), + fac.getNumSymbolIds()); + + PresburgerSet result(fac.getNumDimIds(), fac.getNumSymbolIds()); + Simplex simplex(fac); + subtractRecursively(fac, simplex, set, 0, result); + return result; +} + +/// Return the complement of this set. +PresburgerSet PresburgerSet::complement() const { + return getSetDifference( + FlatAffineConstraints::getUniverse(getNumDims(), getNumSyms()), *this); +} + +/// Return the result of subtract the given set from this set, i.e., +/// return `this \ set`. +PresburgerSet PresburgerSet::subtract(const PresburgerSet &set) const { + assertDimensionsCompatible(set, *this); + PresburgerSet result(nDim, nSym); + // We compute (V_i t_i) \ (V_i set_i) as V_i (t_i \ V_i set_i). + for (const FlatAffineConstraints &fac : flatAffineConstraints) + result.unionSetInPlace(getSetDifference(fac, set)); + return result; +} + +/// Return true if all the sets in the union are known to be integer empty, +/// false otherwise. +bool PresburgerSet::isIntegerEmpty() const { + assert(nSym == 0 && "isIntegerEmpty is intended for non-symbolic sets"); + // The set is empty iff all of the disjuncts are empty. + for (const FlatAffineConstraints &fac : flatAffineConstraints) { + if (!fac.isIntegerEmpty()) + return false; + } + return true; +} + +bool PresburgerSet::findIntegerSample(SmallVectorImpl &sample) { + assert(nSym == 0 && "findIntegerSample is intended for non-symbolic sets"); + // A sample exists iff any of the disjuncts contains a sample. + for (const FlatAffineConstraints &fac : flatAffineConstraints) { + if (Optional> opt = fac.findIntegerSample()) { + sample = std::move(*opt); + return true; + } + } + return false; +} + +void PresburgerSet::print(raw_ostream &os) const { + os << getNumFACs() << " FlatAffineConstraints:\n"; + for (const FlatAffineConstraints &fac : flatAffineConstraints) { + fac.print(os); + os << '\n'; + } +} + +void PresburgerSet::dump() const { print(llvm::errs()); } diff --git a/mlir/unittests/Analysis/AffineStructuresTest.cpp b/mlir/unittests/Analysis/AffineStructuresTest.cpp index bf47f4c302a75..6fcb1c489cfcb 100644 --- a/mlir/unittests/Analysis/AffineStructuresTest.cpp +++ b/mlir/unittests/Analysis/AffineStructuresTest.cpp @@ -15,22 +15,11 @@ namespace mlir { -/// Evaluate the value of the given affine expression at the specified point. -/// The expression is a list of coefficients for the dimensions followed by the -/// constant term. -int64_t valueAt(ArrayRef expr, ArrayRef point) { - assert(expr.size() == 1 + point.size()); - int64_t value = expr.back(); - for (unsigned i = 0; i < point.size(); ++i) - value += expr[i] * point[i]; - return value; -} - /// If 'hasValue' is true, check that findIntegerSample returns a valid sample /// for the FlatAffineConstraints fac. /// /// If hasValue is false, check that findIntegerSample does not return None. -void checkSample(bool hasValue, const FlatAffineConstraints &fac) { +static void checkSample(bool hasValue, const FlatAffineConstraints &fac) { Optional> maybeSample = fac.findIntegerSample(); if (!hasValue) { EXPECT_FALSE(maybeSample.hasValue()); @@ -41,16 +30,13 @@ void checkSample(bool hasValue, const FlatAffineConstraints &fac) { } } else { ASSERT_TRUE(maybeSample.hasValue()); - for (unsigned i = 0; i < fac.getNumEqualities(); ++i) - EXPECT_EQ(valueAt(fac.getEquality(i), *maybeSample), 0); - for (unsigned i = 0; i < fac.getNumInequalities(); ++i) - EXPECT_GE(valueAt(fac.getInequality(i), *maybeSample), 0); + EXPECT_TRUE(fac.containsPoint(*maybeSample)); } } /// Construct a FlatAffineConstraints from a set of inequality and /// equality constraints. -FlatAffineConstraints +static FlatAffineConstraints makeFACFromConstraints(unsigned dims, ArrayRef> ineqs, ArrayRef> eqs) { FlatAffineConstraints fac(ineqs.size(), eqs.size(), dims + 1, dims); @@ -66,9 +52,9 @@ makeFACFromConstraints(unsigned dims, ArrayRef> ineqs, /// orderings may cause the algorithm to proceed differently. At least some of ///.these permutations should make it past the heuristics and test the /// implementation of the GBR algorithm itself. -void checkPermutationsSample(bool hasValue, unsigned nDim, - ArrayRef> ineqs, - ArrayRef> eqs) { +static void checkPermutationsSample(bool hasValue, unsigned nDim, + ArrayRef> ineqs, + ArrayRef> eqs) { SmallVector perm(nDim); std::iota(perm.begin(), perm.end(), 0); auto permute = [&perm](ArrayRef coeffs) { diff --git a/mlir/unittests/Analysis/CMakeLists.txt b/mlir/unittests/Analysis/CMakeLists.txt index 16d084dc452f7..6317aeb8df892 100644 --- a/mlir/unittests/Analysis/CMakeLists.txt +++ b/mlir/unittests/Analysis/CMakeLists.txt @@ -1,5 +1,6 @@ add_mlir_unittest(MLIRAnalysisTests AffineStructuresTest.cpp + PresburgerSetTest.cpp ) target_link_libraries(MLIRAnalysisTests diff --git a/mlir/unittests/Analysis/Presburger/CMakeLists.txt b/mlir/unittests/Analysis/Presburger/CMakeLists.txt index 0cfda9b0c8aa4..5dd69edfad089 100644 --- a/mlir/unittests/Analysis/Presburger/CMakeLists.txt +++ b/mlir/unittests/Analysis/Presburger/CMakeLists.txt @@ -5,3 +5,4 @@ add_mlir_unittest(MLIRPresburgerTests target_link_libraries(MLIRPresburgerTests PRIVATE MLIRPresburger) + diff --git a/mlir/unittests/Analysis/PresburgerSetTest.cpp b/mlir/unittests/Analysis/PresburgerSetTest.cpp new file mode 100644 index 0000000000000..99a0e86222324 --- /dev/null +++ b/mlir/unittests/Analysis/PresburgerSetTest.cpp @@ -0,0 +1,524 @@ +//===- SetTest.cpp - Tests for PresburgerSet ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains tests for PresburgerSet. Each test works by computing +// an operation (union, intersection, subtract, or complement) on two sets +// and checking, for a set of points, that the resulting set contains the point +// iff the result is supposed to contain it. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/PresburgerSet.h" + +#include +#include + +namespace mlir { + +/// Compute the union of s and t, and check that each of the given points +/// belongs to the union iff it belongs to at least one of s and t. +static void testUnionAtPoints(PresburgerSet s, PresburgerSet t, + ArrayRef> points) { + PresburgerSet unionSet = s.unionSet(t); + for (const SmallVector &point : points) { + bool inS = s.containsPoint(point); + bool inT = t.containsPoint(point); + bool inUnion = unionSet.containsPoint(point); + EXPECT_EQ(inUnion, inS || inT); + } +} + +/// Compute the intersection of s and t, and check that each of the given points +/// belongs to the intersection iff it belongs to both of s and t. +static void testIntersectAtPoints(PresburgerSet s, PresburgerSet t, + ArrayRef> points) { + PresburgerSet intersection = s.intersect(t); + for (const SmallVector &point : points) { + bool inS = s.containsPoint(point); + bool inT = t.containsPoint(point); + bool inIntersection = intersection.containsPoint(point); + EXPECT_EQ(inIntersection, inS && inT); + } +} + +/// Compute the set difference s \ t, and check that each of the given points +/// belongs to the difference iff it belongs to s and does not belong to t. +static void testSubtractAtPoints(PresburgerSet s, PresburgerSet t, + ArrayRef> points) { + PresburgerSet diff = s.subtract(t); + for (const SmallVector &point : points) { + bool inS = s.containsPoint(point); + bool inT = t.containsPoint(point); + bool inDiff = diff.containsPoint(point); + if (inT) + EXPECT_FALSE(inDiff); + else + EXPECT_EQ(inDiff, inS); + } +} + +/// Compute the complement of s, and check that each of the given points +/// belongs to the complement iff it does not belong to s. +static void testComplementAtPoints(PresburgerSet s, + ArrayRef> points) { + PresburgerSet complement = s.complement(); + complement.complement(); + for (const SmallVector &point : points) { + bool inS = s.containsPoint(point); + bool inComplement = complement.containsPoint(point); + if (inS) + EXPECT_FALSE(inComplement); + else + EXPECT_TRUE(inComplement); + } +} + +/// Construct a FlatAffineConstraints from a set of inequality and +/// equality constraints. +static FlatAffineConstraints +makeFACFromConstraints(unsigned dims, ArrayRef> ineqs, + ArrayRef> eqs) { + FlatAffineConstraints fac(ineqs.size(), eqs.size(), dims + 1, dims); + for (const SmallVector &eq : eqs) + fac.addEquality(eq); + for (const SmallVector &ineq : ineqs) + fac.addInequality(ineq); + return fac; +} + +static FlatAffineConstraints +makeFACFromIneqs(unsigned dims, ArrayRef> ineqs) { + return makeFACFromConstraints(dims, ineqs, {}); +} + +static PresburgerSet makeSetFromFACs(unsigned dims, + ArrayRef facs) { + PresburgerSet set = PresburgerSet::getEmptySet(dims); + for (const FlatAffineConstraints &fac : facs) + set.unionFACInPlace(fac); + return set; +} + +TEST(SetTest, containsPoint) { + PresburgerSet setA = + makeSetFromFACs(1, { + makeFACFromIneqs(1, {{1, -2}, // x >= 2. + {-1, 8}}), // x <= 8. + makeFACFromIneqs(1, {{1, -10}, // x >= 10. + {-1, 20}}), // x <= 20. + }); + for (unsigned x = 0; x <= 21; ++x) { + if ((2 <= x && x <= 8) || (10 <= x && x <= 20)) + EXPECT_TRUE(setA.containsPoint({x})); + else + EXPECT_FALSE(setA.containsPoint({x})); + } + + // A parallelogram with vertices {(3, 1), (10, -6), (24, 8), (17, 15)} union + // a square with opposite corners (2, 2) and (10, 10). + PresburgerSet setB = + makeSetFromFACs(2, {makeFACFromIneqs(2, + { + {1, 1, -2}, // x + y >= 4. + {-1, -1, 30}, // x + y <= 32. + {1, -1, 0}, // x - y >= 2. + {-1, 1, 10}, // x - y <= 16. + }), + makeFACFromIneqs(2, { + {1, 0, -2}, // x >= 2. + {0, 1, -2}, // y >= 2. + {-1, 0, 10}, // x <= 10. + {0, -1, 10} // y <= 10. + })}); + + for (unsigned x = 1; x <= 25; ++x) { + for (unsigned y = -6; y <= 16; ++y) { + if (4 <= x + y && x + y <= 32 && 2 <= x - y && x - y <= 16) + EXPECT_TRUE(setB.containsPoint({x, y})); + else if (2 <= x && x <= 10 && 2 <= y && y <= 10) + EXPECT_TRUE(setB.containsPoint({x, y})); + else + EXPECT_FALSE(setB.containsPoint({x, y})); + } + } +} + +TEST(SetTest, Union) { + PresburgerSet set = + makeSetFromFACs(1, { + makeFACFromIneqs(1, {{1, -2}, // x >= 2. + {-1, 8}}), // x <= 8. + makeFACFromIneqs(1, {{1, -10}, // x >= 10. + {-1, 20}}), // x <= 20. + }); + + // Universe union set. + testUnionAtPoints(PresburgerSet::getUniverse(1), set, + {{1}, {2}, {8}, {9}, {10}, {20}, {21}}); + + // empty set union set. + testUnionAtPoints(PresburgerSet::getEmptySet(1), set, + {{1}, {2}, {8}, {9}, {10}, {20}, {21}}); + + // empty set union Universe. + testUnionAtPoints(PresburgerSet::getEmptySet(1), + PresburgerSet::getUniverse(1), {{1}, {2}, {0}, {-1}}); + + // Universe union empty set. + testUnionAtPoints(PresburgerSet::getUniverse(1), + PresburgerSet::getEmptySet(1), {{1}, {2}, {0}, {-1}}); + + // empty set union empty set. + testUnionAtPoints(PresburgerSet::getEmptySet(1), + PresburgerSet::getEmptySet(1), {{1}, {2}, {0}, {-1}}); +} + +TEST(SetTest, Intersect) { + PresburgerSet set = + makeSetFromFACs(1, { + makeFACFromIneqs(1, {{1, -2}, // x >= 2. + {-1, 8}}), // x <= 8. + makeFACFromIneqs(1, {{1, -10}, // x >= 10. + {-1, 20}}), // x <= 20. + }); + + // Universe intersection set. + testIntersectAtPoints(PresburgerSet::getUniverse(1), set, + {{1}, {2}, {8}, {9}, {10}, {20}, {21}}); + + // empty set intersection set. + testIntersectAtPoints(PresburgerSet::getEmptySet(1), set, + {{1}, {2}, {8}, {9}, {10}, {20}, {21}}); + + // empty set intersection Universe. + testIntersectAtPoints(PresburgerSet::getEmptySet(1), + PresburgerSet::getUniverse(1), {{1}, {2}, {0}, {-1}}); + + // Universe intersection empty set. + testIntersectAtPoints(PresburgerSet::getUniverse(1), + PresburgerSet::getEmptySet(1), {{1}, {2}, {0}, {-1}}); + + // Universe intersection Universe. + testIntersectAtPoints(PresburgerSet::getUniverse(1), + PresburgerSet::getUniverse(1), {{1}, {2}, {0}, {-1}}); +} + +TEST(SetTest, Subtract) { + // The interval [2, 8] minus + // the interval [10, 20]. + testSubtractAtPoints( + makeSetFromFACs(1, {makeFACFromIneqs(1, {})}), + makeSetFromFACs(1, + { + makeFACFromIneqs(1, {{1, -2}, // x >= 2. + {-1, 8}}), // x <= 8. + makeFACFromIneqs(1, {{1, -10}, // x >= 10. + {-1, 20}}), // x <= 20. + }), + {{1}, {2}, {8}, {9}, {10}, {20}, {21}}); + + // ((-infinity, 0] U [3, 4] U [6, 7]) - ([2, 3] U [5, 6]) + testSubtractAtPoints( + makeSetFromFACs(1, + { + makeFACFromIneqs(1, + { + {-1, 0} // x <= 0. + }), + makeFACFromIneqs(1, + { + {1, -3}, // x >= 3. + {-1, 4} // x <= 4. + }), + makeFACFromIneqs(1, + { + {1, -6}, // x >= 6. + {-1, 7} // x <= 7. + }), + }), + makeSetFromFACs(1, {makeFACFromIneqs(1, + { + {1, -2}, // x >= 2. + {-1, 3}, // x <= 3. + }), + makeFACFromIneqs(1, + { + {1, -5}, // x >= 5. + {-1, 6} // x <= 6. + })}), + {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); + + // Expected result is {[x, y] : x > y}, i.e., {[x, y] : x >= y + 1}. + testSubtractAtPoints( + makeSetFromFACs(2, {makeFACFromIneqs(2, + { + {1, -1, 0} // x >= y. + })}), + makeSetFromFACs(2, {makeFACFromIneqs(2, + { + {1, 1, 0} // x >= -y. + })}), + {{0, 1}, {1, 1}, {1, 0}, {1, -1}, {0, -1}}); + + // A rectangle with corners at (2, 2) and (10, 10), minus + // a rectangle with corners at (5, -10) and (7, 100). + // This splits the former rectangle into two halves, (2, 2) to (5, 10) and + // (7, 2) to (10, 10). + testSubtractAtPoints( + makeSetFromFACs(2, {makeFACFromIneqs(2, + { + {1, 0, -2}, // x >= 2. + {0, 1, -2}, // y >= 2. + {-1, 0, 10}, // x <= 10. + {0, -1, 10} // y <= 10. + })}), + makeSetFromFACs(2, {makeFACFromIneqs(2, + { + {1, 0, -5}, // x >= 5. + {0, 1, 10}, // y >= -10. + {-1, 0, 7}, // x <= 7. + {0, -1, 100}, // y <= 100. + })}), + {{1, 2}, {2, 2}, {4, 2}, {5, 2}, {7, 2}, {8, 2}, {11, 2}, + {1, 1}, {2, 1}, {4, 1}, {5, 1}, {7, 1}, {8, 1}, {11, 1}, + {1, 10}, {2, 10}, {4, 10}, {5, 10}, {7, 10}, {8, 10}, {11, 10}, + {1, 11}, {2, 11}, {4, 11}, {5, 11}, {7, 11}, {8, 11}, {11, 11}}); + + // A rectangle with corners at (2, 2) and (10, 10), minus + // a rectangle with corners at (5, 4) and (7, 8). + // This creates a hole in the middle of the former rectangle, and the + // resulting set can be represented as a union of four rectangles. + testSubtractAtPoints( + makeSetFromFACs(2, {makeFACFromIneqs(2, + { + {1, 0, -2}, // x >= 2. + {0, 1, -2}, // y >= 2. + {-1, 0, 10}, // x <= 10. + {0, -1, 10} // y <= 10. + })}), + makeSetFromFACs(2, {makeFACFromIneqs(2, + { + {1, 0, -5}, // x >= 5. + {0, 1, -4}, // y >= 4. + {-1, 0, 7}, // x <= 7. + {0, -1, 8}, // y <= 8. + })}), + {{1, 1}, + {2, 2}, + {10, 10}, + {11, 11}, + {5, 4}, + {7, 4}, + {5, 8}, + {7, 8}, + {4, 4}, + {8, 4}, + {4, 8}, + {8, 8}}); + + // The second set is a superset of the first one, since on the line x + y = 0, + // y <= 1 is equivalent to x >= -1. So the result is empty. + testSubtractAtPoints( + makeSetFromFACs(2, {makeFACFromConstraints(2, + { + {1, 0, 0} // x >= 0. + }, + { + {1, 1, 0} // x + y = 0. + })}), + makeSetFromFACs(2, {makeFACFromConstraints(2, + { + {0, -1, 1} // y <= 1. + }, + { + {1, 1, 0} // x + y = 0. + })}), + {{0, 0}, + {1, -1}, + {2, -2}, + {-1, 1}, + {-2, 2}, + {1, 1}, + {-1, -1}, + {-1, 1}, + {1, -1}}); + + // The result should be {0} U {2}. + testSubtractAtPoints( + makeSetFromFACs(1, + { + makeFACFromIneqs(1, {{1, 0}, // x >= 0. + {-1, 2}}), // x <= 2. + }), + makeSetFromFACs(1, + { + makeFACFromConstraints(1, {}, + { + {1, -1} // x = 1. + }), + }), + {{-1}, {0}, {1}, {2}, {3}}); + + // Sets with lots of redundant inequalities to test the redundancy heuristic. + // (the heuristic is for the subtrahend, the second set which is the one being + // subtracted) + + // A parallelogram with vertices {(3, 1), (10, -6), (24, 8), (17, 15)} minus + // a triangle with vertices {(2, 2), (10, 2), (10, 10)}. + testSubtractAtPoints( + makeSetFromFACs(2, {makeFACFromIneqs(2, + { + {1, 1, -2}, // x + y >= 4. + {-1, -1, 30}, // x + y <= 32. + {1, -1, 0}, // x - y >= 2. + {-1, 1, 10}, // x - y <= 16. + })}), + makeSetFromFACs( + 2, {makeFACFromIneqs(2, + { + {1, 0, -2}, // x >= 2. [redundant] + {0, 1, -2}, // y >= 2. + {-1, 0, 10}, // x <= 10. + {0, -1, 10}, // y <= 10. [redundant] + {1, 1, -2}, // x + y >= 2. [redundant] + {-1, -1, 30}, // x + y <= 30. [redundant] + {1, -1, 0}, // x - y >= 0. + {-1, 1, 10}, // x - y <= 10. + })}), + {{1, 2}, {2, 2}, {3, 2}, {4, 2}, {1, 1}, {2, 1}, {3, 1}, + {4, 1}, {2, 0}, {3, 0}, {4, 0}, {5, 0}, {10, 2}, {11, 2}, + {10, 1}, {10, 10}, {10, 11}, {10, 9}, {11, 10}, {10, -6}, {11, -6}, + {24, 8}, {24, 7}, {17, 15}, {16, 15}}); + + testSubtractAtPoints( + makeSetFromFACs(2, {makeFACFromIneqs(2, + { + {1, 1, -2}, // x + y >= 4. + {-1, -1, 30}, // x + y <= 32. + {1, -1, 0}, // x - y >= 2. + {-1, 1, 10}, // x - y <= 16. + })}), + makeSetFromFACs( + 2, {makeFACFromIneqs(2, + { + {1, 0, -2}, // x >= 2. [redundant] + {0, 1, -2}, // y >= 2. + {-1, 0, 10}, // x <= 10. + {0, -1, 10}, // y <= 10. [redundant] + {1, 1, -2}, // x + y >= 2. [redundant] + {-1, -1, 30}, // x + y <= 30. [redundant] + {1, -1, 0}, // x - y >= 0. + {-1, 1, 10}, // x - y <= 10. + })}), + {{1, 2}, {2, 2}, {3, 2}, {4, 2}, {1, 1}, {2, 1}, {3, 1}, + {4, 1}, {2, 0}, {3, 0}, {4, 0}, {5, 0}, {10, 2}, {11, 2}, + {10, 1}, {10, 10}, {10, 11}, {10, 9}, {11, 10}, {10, -6}, {11, -6}, + {24, 8}, {24, 7}, {17, 15}, {16, 15}}); + + // ((-infinity, -5] U [3, 3] U [4, 4] U [5, 5]) - ([-2, -10] U [3, 4] U [6, + // 7]) + testSubtractAtPoints( + makeSetFromFACs(1, + { + makeFACFromIneqs(1, + { + {-1, -5}, // x <= -5. + }), + makeFACFromConstraints(1, {}, + { + {1, -3} // x = 3. + }), + makeFACFromConstraints(1, {}, + { + {1, -4} // x = 4. + }), + makeFACFromConstraints(1, {}, + { + {1, -5} // x = 5. + }), + }), + makeSetFromFACs( + 1, + { + makeFACFromIneqs(1, + { + {-1, -2}, // x <= -2. + {1, -10}, // x >= -10. + {-1, 0}, // x <= 0. [redundant] + {-1, 10}, // x <= 10. [redundant] + {1, -100}, // x >= -100. [redundant] + {1, -50} // x >= -50. [redundant] + }), + makeFACFromIneqs(1, + { + {1, -3}, // x >= 3. + {-1, 4}, // x <= 4. + {1, 1}, // x >= -1. [redundant] + {1, 7}, // x >= -7. [redundant] + {-1, 10} // x <= 10. [redundant] + }), + makeFACFromIneqs(1, + { + {1, -6}, // x >= 6. + {-1, 7}, // x <= 7. + {1, 1}, // x >= -1. [redundant] + {1, -3}, // x >= -3. [redundant] + {-1, 5} // x <= 5. [redundant] + }), + }), + {{-6}, + {-5}, + {-4}, + {-9}, + {-10}, + {-11}, + {0}, + {1}, + {2}, + {3}, + {4}, + {5}, + {6}, + {7}, + {8}}); +} + +TEST(SetTest, Complement) { + // Complement of universe. + testComplementAtPoints( + PresburgerSet::getUniverse(1), + {{-1}, {-2}, {-8}, {1}, {2}, {8}, {9}, {10}, {20}, {21}}); + + // Complement of empty set. + testComplementAtPoints( + PresburgerSet::getEmptySet(1), + {{-1}, {-2}, {-8}, {1}, {2}, {8}, {9}, {10}, {20}, {21}}); + + testComplementAtPoints( + makeSetFromFACs(2, {makeFACFromIneqs(2, + { + {1, 0, -2}, // x >= 2. + {0, 1, -2}, // y >= 2. + {-1, 0, 10}, // x <= 10. + {0, -1, 10} // y <= 10. + })}), + {{1, 1}, + {2, 1}, + {1, 2}, + {2, 2}, + {2, 3}, + {3, 2}, + {10, 10}, + {10, 11}, + {11, 10}, + {2, 10}, + {2, 11}, + {1, 10}}); +} + +} // namespace mlir From 9908ee5670596db4fdc2bd7ea7c3071c0e02a784 Mon Sep 17 00:00:00 2001 From: Fanbo Meng Date: Wed, 7 Oct 2020 11:33:19 -0400 Subject: [PATCH 235/321] [SystemZ][z/OS] Add test of zero length bitfield type size larger than target zero length bitfield boundary Reviewed By: hubert.reinterpretcast Differential Revision: https://reviews.llvm.org/D88963 --- clang/test/CodeGen/zos-alignment.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/clang/test/CodeGen/zos-alignment.c b/clang/test/CodeGen/zos-alignment.c index 9d7bfe8923d0c..4b572fcac5a97 100644 --- a/clang/test/CodeGen/zos-alignment.c +++ b/clang/test/CodeGen/zos-alignment.c @@ -90,6 +90,17 @@ struct s10 { // CHECK-NEXT: 0 | unsigned int a // CHECK-NEXT: | [sizeof=16, align=16] +struct s11 { + char a; + long :0; + char b; +} S11; +// CHECK: 0 | struct s11 +// CHECK-NEXT: 0 | char a +// CHECK-NEXT: 8:- | long +// CHECK-NEXT: 8 | char b +// CHECK-NEXT: | [sizeof=16, align=8] + union u0 { unsigned short d1 __attribute__((packed)); int d2:10; From ddf1864ace484035e3cde5e83b3a31ac81e059c6 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 6 Aug 2020 09:06:43 -0700 Subject: [PATCH 236/321] BPF: add AdjustOpt IR pass to generate verifier friendly codes Add an IR phase right before main module optimization. This is to modify IR to restrict certain downward optimizations in order to generate verifier friendly code. > prevent certain instcombine optimizations, handling both in-block/cross-block instcombines. > avoid speculative code motion if the variable used in condition is also used in the later blocks. Internally, a bpf IR builtin result = __builtin_bpf_passthrough(seq_num, result) is used to enforce ordering. This builtin is only used during target independent IR optimizations and it will be removed at the beginning of target dependent IR optimizations. For example, removing the following workaround, --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c @@ -47,7 +47,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx) /* a workaround to prevent compiler from generating * codes verifier cannot handle yet. */ - volatile int ret; + int ret; this patch is able to generate code which passed the verifier. To disable optimization, users need to use "opt" command like below: clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes test.c // disable icmp serialization opt -O2 -bpf-disable-serialize-icmp test.ll | llvm-dis > t.ll // disable avoid-speculation opt -O2 -bpf-disable-avoid-speculation test.ll | llvm-dis > t.ll llc t.ll Differential Revision: https://reviews.llvm.org/D85570 --- llvm/lib/Target/BPF/BPF.h | 2 + llvm/lib/Target/BPF/BPFAdjustOpt.cpp | 310 ++++++++++++++++++ llvm/lib/Target/BPF/BPFTargetMachine.cpp | 6 + llvm/lib/Target/BPF/CMakeLists.txt | 1 + llvm/test/CodeGen/BPF/adjust-opt-icmp1.ll | 90 +++++ llvm/test/CodeGen/BPF/adjust-opt-icmp2.ll | 97 ++++++ .../CodeGen/BPF/adjust-opt-speculative1.ll | 84 +++++ .../CodeGen/BPF/adjust-opt-speculative2.ll | 91 +++++ 8 files changed, 681 insertions(+) create mode 100644 llvm/lib/Target/BPF/BPFAdjustOpt.cpp create mode 100644 llvm/test/CodeGen/BPF/adjust-opt-icmp1.ll create mode 100644 llvm/test/CodeGen/BPF/adjust-opt-icmp2.ll create mode 100644 llvm/test/CodeGen/BPF/adjust-opt-speculative1.ll create mode 100644 llvm/test/CodeGen/BPF/adjust-opt-speculative2.ll diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h index 966a3b0cf26e6..82ac091fa7fa3 100644 --- a/llvm/lib/Target/BPF/BPF.h +++ b/llvm/lib/Target/BPF/BPF.h @@ -16,6 +16,7 @@ namespace llvm { class BPFTargetMachine; +ModulePass *createBPFAdjustOpt(); ModulePass *createBPFCheckAndAdjustIR(); FunctionPass *createBPFAbstractMemberAccess(BPFTargetMachine *TM); @@ -27,6 +28,7 @@ FunctionPass *createBPFMIPeepholeTruncElimPass(); FunctionPass *createBPFMIPreEmitPeepholePass(); FunctionPass *createBPFMIPreEmitCheckingPass(); +void initializeBPFAdjustOptPass(PassRegistry&); void initializeBPFCheckAndAdjustIRPass(PassRegistry&); void initializeBPFAbstractMemberAccessLegacyPassPass(PassRegistry &); diff --git a/llvm/lib/Target/BPF/BPFAdjustOpt.cpp b/llvm/lib/Target/BPF/BPFAdjustOpt.cpp new file mode 100644 index 0000000000000..8efaa9d72b576 --- /dev/null +++ b/llvm/lib/Target/BPF/BPFAdjustOpt.cpp @@ -0,0 +1,310 @@ +//===---------------- BPFAdjustOpt.cpp - Adjust Optimization --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Adjust optimization to make the code more kernel verifier friendly. +// +//===----------------------------------------------------------------------===// + +#include "BPF.h" +#include "BPFCORE.h" +#include "BPFTargetMachine.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +#define DEBUG_TYPE "bpf-adjust-opt" + +using namespace llvm; + +static cl::opt + DisableBPFserializeICMP("bpf-disable-serialize-icmp", cl::Hidden, + cl::desc("BPF: Disable Serializing ICMP insns."), + cl::init(false)); + +static cl::opt DisableBPFavoidSpeculation( + "bpf-disable-avoid-speculation", cl::Hidden, + cl::desc("BPF: Disable Avoiding Speculative Code Motion."), + cl::init(false)); + +namespace { + +class BPFAdjustOpt final : public ModulePass { + struct PassThroughInfo { + Instruction *Input; + Instruction *UsedInst; + uint32_t OpIdx; + PassThroughInfo(Instruction *I, Instruction *U, uint32_t Idx) + : Input(I), UsedInst(U), OpIdx(Idx) {} + }; + +public: + static char ID; + Module *Mod; + + BPFAdjustOpt() : ModulePass(ID) {} + bool runOnModule(Module &M) override; + +private: + SmallVector PassThroughs; + + void adjustBasicBlock(BasicBlock &BB); + bool serializeICMPCrossBB(BasicBlock &BB); + void adjustInst(Instruction &I); + bool serializeICMPInBB(Instruction &I); + bool avoidSpeculation(Instruction &I); + bool insertPassThrough(); +}; + +} // End anonymous namespace + +char BPFAdjustOpt::ID = 0; +INITIALIZE_PASS(BPFAdjustOpt, "bpf-adjust-opt", "BPF Adjust Optimization", + false, false) + +ModulePass *llvm::createBPFAdjustOpt() { return new BPFAdjustOpt(); } + +bool BPFAdjustOpt::runOnModule(Module &M) { + Mod = &M; + for (Function &F : M) + for (auto &BB : F) { + adjustBasicBlock(BB); + for (auto &I : BB) + adjustInst(I); + } + + return insertPassThrough(); +} + +bool BPFAdjustOpt::insertPassThrough() { + for (auto &Info : PassThroughs) { + auto *CI = BPFCoreSharedInfo::insertPassThrough( + Mod, Info.UsedInst->getParent(), Info.Input, Info.UsedInst); + Info.UsedInst->setOperand(Info.OpIdx, CI); + } + + return !PassThroughs.empty(); +} + +// To avoid combining conditionals in the same basic block by +// instrcombine optimization. +bool BPFAdjustOpt::serializeICMPInBB(Instruction &I) { + // For: + // comp1 = icmp ...; + // comp2 = icmp ...; + // ... or comp1 comp2 ... + // changed to: + // comp1 = icmp ...; + // comp2 = icmp ...; + // new_comp1 = __builtin_bpf_passthrough(seq_num, comp1) + // ... or new_comp1 comp2 ... + if (I.getOpcode() != Instruction::Or) + return false; + auto *Icmp1 = dyn_cast(I.getOperand(0)); + if (!Icmp1) + return false; + auto *Icmp2 = dyn_cast(I.getOperand(1)); + if (!Icmp2) + return false; + + Value *Icmp1Op0 = Icmp1->getOperand(0); + Value *Icmp2Op0 = Icmp2->getOperand(0); + if (Icmp1Op0 != Icmp2Op0) + return false; + + // Now we got two icmp instructions which feed into + // an "or" instruction. + PassThroughInfo Info(Icmp1, &I, 0); + PassThroughs.push_back(Info); + return true; +} + +// To avoid combining conditionals in the same basic block by +// instrcombine optimization. +bool BPFAdjustOpt::serializeICMPCrossBB(BasicBlock &BB) { + // For: + // B1: + // comp1 = icmp ...; + // if (comp1) goto B2 else B3; + // B2: + // comp2 = icmp ...; + // if (comp2) goto B4 else B5; + // B4: + // ... + // changed to: + // B1: + // comp1 = icmp ...; + // comp1 = __builtin_bpf_passthrough(seq_num, comp1); + // if (comp1) goto B2 else B3; + // B2: + // comp2 = icmp ...; + // if (comp2) goto B4 else B5; + // B4: + // ... + + // Check basic predecessors, if two of them (say B1, B2) are using + // icmp instructions to generate conditions and one is the predesessor + // of another (e.g., B1 is the predecessor of B2). Add a passthrough + // barrier after icmp inst of block B1. + BasicBlock *B2 = BB.getSinglePredecessor(); + if (!B2) + return false; + + BasicBlock *B1 = B2->getSinglePredecessor(); + if (!B1) + return false; + + Instruction *TI = B2->getTerminator(); + auto *BI = dyn_cast(TI); + if (!BI || !BI->isConditional()) + return false; + auto *Cond = dyn_cast(BI->getCondition()); + if (!Cond || B2->getFirstNonPHI() != Cond) + return false; + Value *B2Op0 = Cond->getOperand(0); + auto Cond2Op = Cond->getPredicate(); + + TI = B1->getTerminator(); + BI = dyn_cast(TI); + if (!BI || !BI->isConditional()) + return false; + Cond = dyn_cast(BI->getCondition()); + if (!Cond) + return false; + Value *B1Op0 = Cond->getOperand(0); + auto Cond1Op = Cond->getPredicate(); + + if (B1Op0 != B2Op0) + return false; + + if (Cond1Op == ICmpInst::ICMP_SGT || Cond1Op == ICmpInst::ICMP_SGE) { + if (Cond2Op != ICmpInst::ICMP_SLT && Cond1Op != ICmpInst::ICMP_SLE) + return false; + } else if (Cond1Op == ICmpInst::ICMP_SLT || Cond1Op == ICmpInst::ICMP_SLE) { + if (Cond2Op != ICmpInst::ICMP_SGT && Cond1Op != ICmpInst::ICMP_SGE) + return false; + } else { + return false; + } + + PassThroughInfo Info(Cond, BI, 0); + PassThroughs.push_back(Info); + + return true; +} + +// To avoid speculative hoisting certain computations out of +// a basic block. +bool BPFAdjustOpt::avoidSpeculation(Instruction &I) { + if (auto *LdInst = dyn_cast(&I)) { + if (auto *GV = dyn_cast(LdInst->getOperand(0))) { + if (GV->hasAttribute(BPFCoreSharedInfo::AmaAttr) || + GV->hasAttribute(BPFCoreSharedInfo::TypeIdAttr)) + return false; + } + } + + if (!dyn_cast(&I) && !dyn_cast(&I)) + return false; + + // For: + // B1: + // var = ... + // ... + // /* icmp may not be in the same block as var = ... */ + // comp1 = icmp var, ; + // if (comp1) goto B2 else B3; + // B2: + // ... var ... + // change to: + // B1: + // var = ... + // ... + // /* icmp may not be in the same block as var = ... */ + // comp1 = icmp var, ; + // if (comp1) goto B2 else B3; + // B2: + // var = __builtin_bpf_passthrough(seq_num, var); + // ... var ... + bool isCandidate = false; + SmallVector Candidates; + for (User *U : I.users()) { + Instruction *Inst = dyn_cast(U); + if (!Inst) + continue; + + // May cover a little bit more than the + // above pattern. + if (auto *Icmp1 = dyn_cast(Inst)) { + Value *Icmp1Op1 = Icmp1->getOperand(1); + if (!isa(Icmp1Op1)) + return false; + isCandidate = true; + continue; + } + + // Ignore the use in the same basic block as the definition. + if (Inst->getParent() == I.getParent()) + continue; + + // use in a different basic block, If there is a call or + // load/store insn before this instruction in this basic + // block. Most likely it cannot be hoisted out. Skip it. + for (auto &I2 : *Inst->getParent()) { + if (dyn_cast(&I2)) + return false; + if (dyn_cast(&I2) || dyn_cast(&I2)) + return false; + if (&I2 == Inst) + break; + } + + // It should be used in a GEP or a simple arithmetic like + // ZEXT/SEXT which is used for GEP. + if (Inst->getOpcode() == Instruction::ZExt || + Inst->getOpcode() == Instruction::SExt) { + PassThroughInfo Info(&I, Inst, 0); + Candidates.push_back(Info); + } else if (auto *GI = dyn_cast(Inst)) { + // traverse GEP inst to find Use operand index + unsigned i, e; + for (i = 1, e = GI->getNumOperands(); i != e; ++i) { + Value *V = GI->getOperand(i); + if (V == &I) + break; + } + if (i == e) + continue; + + PassThroughInfo Info(&I, GI, i); + Candidates.push_back(Info); + } + } + + if (!isCandidate || Candidates.empty()) + return false; + + PassThroughs.insert(PassThroughs.end(), Candidates.begin(), Candidates.end()); + return true; +} + +void BPFAdjustOpt::adjustBasicBlock(BasicBlock &BB) { + if (!DisableBPFserializeICMP && serializeICMPCrossBB(BB)) + return; +} + +void BPFAdjustOpt::adjustInst(Instruction &I) { + if (!DisableBPFserializeICMP && serializeICMPInBB(I)) + return; + if (!DisableBPFavoidSpeculation && avoidSpeculation(I)) + return; +} diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp index e5fc5bac97a8d..7ef35105083fb 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -42,6 +42,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTarget() { PassRegistry &PR = *PassRegistry::getPassRegistry(); initializeBPFAbstractMemberAccessLegacyPassPass(PR); initializeBPFPreserveDITypePass(PR); + initializeBPFAdjustOptPass(PR); initializeBPFCheckAndAdjustIRPass(PR); initializeBPFMIPeepholePass(PR); initializeBPFMIPeepholeTruncElimPass(PR); @@ -115,6 +116,11 @@ void BPFTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { PM.add(createCFGSimplificationPass( SimplifyCFGOptions().hoistCommonInsts(true))); }); + Builder.addExtension( + PassManagerBuilder::EP_ModuleOptimizerEarly, + [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) { + PM.add(createBPFAdjustOpt()); + }); } void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt index 27dd685de6bb8..a17d7173816a8 100644 --- a/llvm/lib/Target/BPF/CMakeLists.txt +++ b/llvm/lib/Target/BPF/CMakeLists.txt @@ -14,6 +14,7 @@ add_public_tablegen_target(BPFCommonTableGen) add_llvm_target(BPFCodeGen BPFAbstractMemberAccess.cpp + BPFAdjustOpt.cpp BPFAsmPrinter.cpp BPFCheckAndAdjustIR.cpp BPFFrameLowering.cpp diff --git a/llvm/test/CodeGen/BPF/adjust-opt-icmp1.ll b/llvm/test/CodeGen/BPF/adjust-opt-icmp1.ll new file mode 100644 index 0000000000000..bb651f4ea57ad --- /dev/null +++ b/llvm/test/CodeGen/BPF/adjust-opt-icmp1.ll @@ -0,0 +1,90 @@ +; RUN: opt -O2 -mtriple=bpf-pc-linux %s | llvm-dis > %t1 +; RUN: llc %t1 -o - | FileCheck -check-prefixes=CHECK %s +; RUN: opt -O2 -mtriple=bpf-pc-linux -bpf-disable-serialize-icmp %s | llvm-dis > %t1 +; RUN: llc %t1 -o - | FileCheck -check-prefixes=CHECK-DISABLE %s +; +; Source: +; int foo(); +; int bar(int); +; int test() { +; int ret = foo(); +; if (ret <= 0 || ret > 7) +; return 0; +; return bar(ret); +; } +; Compilation flag: +; clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes test.c + +; Function Attrs: nounwind +define dso_local i32 @test() #0 { +entry: + %retval = alloca i32, align 4 + %ret = alloca i32, align 4 + %cleanup.dest.slot = alloca i32, align 4 + %0 = bitcast i32* %ret to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #3 + %call = call i32 bitcast (i32 (...)* @foo to i32 ()*)() + store i32 %call, i32* %ret, align 4, !tbaa !2 + %1 = load i32, i32* %ret, align 4, !tbaa !2 + %cmp = icmp sle i32 %1, 0 + br i1 %cmp, label %if.then, label %lor.lhs.false + +; CHECK: [[REG1:r[0-9]+]] <<= 32 +; CHECK: [[REG1]] s>>= 32 +; CHECK: [[REG2:r[0-9]+]] = 1 +; CHECK: if [[REG2]] s> [[REG1]] goto +; CHECK: if [[REG1]] s> 7 goto + +; CHECK-DISABLE: [[REG1:r[0-9]+]] += -1 +; CHECK-DISABLE: [[REG1]] <<= 32 +; CHECK-DISABLE: [[REG1]] >>= 32 +; CHECK-DISABLE: if [[REG1]] > 6 goto + +lor.lhs.false: ; preds = %entry + %2 = load i32, i32* %ret, align 4, !tbaa !2 + %cmp1 = icmp sgt i32 %2, 7 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %lor.lhs.false, %entry + store i32 0, i32* %retval, align 4 + store i32 1, i32* %cleanup.dest.slot, align 4 + br label %cleanup + +if.end: ; preds = %lor.lhs.false + %3 = load i32, i32* %ret, align 4, !tbaa !2 + %call2 = call i32 @bar(i32 %3) + store i32 %call2, i32* %retval, align 4 + store i32 1, i32* %cleanup.dest.slot, align 4 + br label %cleanup + +cleanup: ; preds = %if.end, %if.then + %4 = bitcast i32* %ret to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %4) #3 + %5 = load i32, i32* %retval, align 4 + ret i32 %5 +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +declare dso_local i32 @foo(...) #2 + +declare dso_local i32 @bar(i32) #2 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +attributes #0 = { nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git ca9c5433a6c31e372092fcd8bfd0e4fddd7e8784)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/CodeGen/BPF/adjust-opt-icmp2.ll b/llvm/test/CodeGen/BPF/adjust-opt-icmp2.ll new file mode 100644 index 0000000000000..a264422d2b762 --- /dev/null +++ b/llvm/test/CodeGen/BPF/adjust-opt-icmp2.ll @@ -0,0 +1,97 @@ +; RUN: opt -O2 -mtriple=bpf-pc-linux %s | llvm-dis > %t1 +; RUN: llc %t1 -o - | FileCheck -check-prefixes=CHECK %s +; RUN: opt -O2 -mtriple=bpf-pc-linux -bpf-disable-serialize-icmp %s | llvm-dis > %t1 +; RUN: llc %t1 -o - | FileCheck -check-prefixes=CHECK-DISABLE %s +; +; Source: +; int foo(); +; int bar(int); +; int test() { +; int ret = foo(); +; if (ret <= 0) +; return 0; +; if (ret > 7) +; return 0; +; return bar(ret); +; } +; Compilation flag: +; clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes test.c + +; Function Attrs: nounwind +define dso_local i32 @test() #0 { +entry: + %retval = alloca i32, align 4 + %ret = alloca i32, align 4 + %cleanup.dest.slot = alloca i32, align 4 + %0 = bitcast i32* %ret to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #3 + %call = call i32 bitcast (i32 (...)* @foo to i32 ()*)() + store i32 %call, i32* %ret, align 4, !tbaa !2 + %1 = load i32, i32* %ret, align 4, !tbaa !2 + %cmp = icmp sle i32 %1, 0 + br i1 %cmp, label %if.then, label %if.end + +; CHECK: [[REG1:r[0-9]+]] <<= 32 +; CHECK: [[REG1]] s>>= 32 +; CHECK: [[REG2:r[0-9]+]] = 1 +; CHECK: if [[REG2]] s> [[REG1]] goto +; CHECK: if [[REG1]] s> 7 goto + +; CHECK-DISABLE: [[REG1:r[0-9]+]] += -1 +; CHECK-DISABLE: [[REG1]] <<= 32 +; CHECK-DISABLE: [[REG1]] >>= 32 +; CHECK-DISABLE: if [[REG1]] > 6 goto + +if.then: ; preds = %entry + store i32 0, i32* %retval, align 4 + store i32 1, i32* %cleanup.dest.slot, align 4 + br label %cleanup + +if.end: ; preds = %entry + %2 = load i32, i32* %ret, align 4, !tbaa !2 + %cmp1 = icmp sgt i32 %2, 7 + br i1 %cmp1, label %if.then2, label %if.end3 + +if.then2: ; preds = %if.end + store i32 0, i32* %retval, align 4 + store i32 1, i32* %cleanup.dest.slot, align 4 + br label %cleanup + +if.end3: ; preds = %if.end + %3 = load i32, i32* %ret, align 4, !tbaa !2 + %call4 = call i32 @bar(i32 %3) + store i32 %call4, i32* %retval, align 4 + store i32 1, i32* %cleanup.dest.slot, align 4 + br label %cleanup + +cleanup: ; preds = %if.end3, %if.then2, %if.then + %4 = bitcast i32* %ret to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %4) #3 + %5 = load i32, i32* %retval, align 4 + ret i32 %5 +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +declare dso_local i32 @foo(...) #2 + +declare dso_local i32 @bar(i32) #2 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +attributes #0 = { nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git ca9c5433a6c31e372092fcd8bfd0e4fddd7e8784)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/CodeGen/BPF/adjust-opt-speculative1.ll b/llvm/test/CodeGen/BPF/adjust-opt-speculative1.ll new file mode 100644 index 0000000000000..0d29fff08eed7 --- /dev/null +++ b/llvm/test/CodeGen/BPF/adjust-opt-speculative1.ll @@ -0,0 +1,84 @@ +; RUN: opt -O2 -mtriple=bpf-pc-linux %s | llvm-dis > %t1 +; RUN: llc %t1 -o - | FileCheck -check-prefixes=CHECK-COMMON,CHECK %s +; RUN: opt -O2 -mtriple=bpf-pc-linux -bpf-disable-avoid-speculation %s | llvm-dis > %t1 +; RUN: llc %t1 -o - | FileCheck -check-prefixes=CHECK-COMMON,CHECK-DISABLE %s +; +; Source: +; unsigned long foo(); +; void *test(void *p) { +; unsigned long ret = foo(); +; if (ret <= 7) +; p += ret; +; return p; +; } +; Compilation flag: +; clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes test.c + +; Function Attrs: nounwind +define dso_local i8* @test(i8* %p) #0 { +entry: + %p.addr = alloca i8*, align 8 + %ret = alloca i64, align 8 + store i8* %p, i8** %p.addr, align 8, !tbaa !2 + %0 = bitcast i64* %ret to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %0) #3 + %call = call i64 bitcast (i64 (...)* @foo to i64 ()*)() + store i64 %call, i64* %ret, align 8, !tbaa !6 + %1 = load i64, i64* %ret, align 8, !tbaa !6 + %cmp = icmp ule i64 %1, 7 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %2 = load i64, i64* %ret, align 8, !tbaa !6 + %3 = load i8*, i8** %p.addr, align 8, !tbaa !2 + %add.ptr = getelementptr i8, i8* %3, i64 %2 + store i8* %add.ptr, i8** %p.addr, align 8, !tbaa !2 + br label %if.end + +if.end: ; preds = %if.then, %entry + %4 = load i8*, i8** %p.addr, align 8, !tbaa !2 + %5 = bitcast i64* %ret to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %5) #3 + ret i8* %4 +} +; CHECK-COMMON: [[REG6:r[0-9]+]] = r1 +; CHECK-COMMON: call foo + +; CHECK: if r0 > 7 goto [[LABEL:.*]] +; CHECK: [[REG6]] += r0 +; CHECK: [[LABEL]]: +; CHECK: r0 = [[REG6]] + +; CHECK-DISABLE: r0 = [[REG6]] +; CHECK-DISABLE: r0 += [[REG1:r[0-9]+]] +; CHECK-DISABLE: [[REG2:r[0-9]+]] = 8 +; CHECK-DISABLE: if [[REG2]] > [[REG1]] goto [[LABEL:.*]] +; CHECK-DISABLE: r0 = [[REG6]] +; CHECK-DISABLE: [[LABEL]]: + +; CHECK-COMMON: exit + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +declare dso_local i64 @foo(...) #2 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +attributes #0 = { nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git ca9c5433a6c31e372092fcd8bfd0e4fddd7e8784)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"any pointer", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7, !7, i64 0} +!7 = !{!"long", !4, i64 0} diff --git a/llvm/test/CodeGen/BPF/adjust-opt-speculative2.ll b/llvm/test/CodeGen/BPF/adjust-opt-speculative2.ll new file mode 100644 index 0000000000000..3b5cbea9f84b6 --- /dev/null +++ b/llvm/test/CodeGen/BPF/adjust-opt-speculative2.ll @@ -0,0 +1,91 @@ +; RUN: opt -O2 -mtriple=bpf-pc-linux %s | llvm-dis > %t1 +; RUN: llc %t1 -o - | FileCheck -check-prefixes=CHECK-COMMON,CHECK %s +; RUN: opt -O2 -mtriple=bpf-pc-linux -bpf-disable-avoid-speculation %s | llvm-dis > %t1 +; RUN: llc %t1 -o - | FileCheck -check-prefixes=CHECK-COMMON,CHECK-DISABLE %s +; +; Source: +; unsigned foo(); +; void *test(void *p) { +; unsigned ret = foo(); +; if (ret <= 7) +; p += ret; +; return p; +; } +; Compilation flag: +; clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes test.c + +; Function Attrs: nounwind +define dso_local i8* @test(i8* %p) #0 { +entry: + %p.addr = alloca i8*, align 8 + %ret = alloca i32, align 4 + store i8* %p, i8** %p.addr, align 8, !tbaa !2 + %0 = bitcast i32* %ret to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #3 + %call = call i32 bitcast (i32 (...)* @foo to i32 ()*)() + store i32 %call, i32* %ret, align 4, !tbaa !6 + %1 = load i32, i32* %ret, align 4, !tbaa !6 + %cmp = icmp ule i32 %1, 7 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %2 = load i32, i32* %ret, align 4, !tbaa !6 + %3 = load i8*, i8** %p.addr, align 8, !tbaa !2 + %idx.ext = zext i32 %2 to i64 + %add.ptr = getelementptr i8, i8* %3, i64 %idx.ext + store i8* %add.ptr, i8** %p.addr, align 8, !tbaa !2 + br label %if.end + +if.end: ; preds = %if.then, %entry + %4 = load i8*, i8** %p.addr, align 8, !tbaa !2 + %5 = bitcast i32* %ret to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %5) #3 + ret i8* %4 +} + +; CHECK-COMMON: [[REG6:r[0-9]+]] = r1 +; CHECK-COMMON: call foo + +; CHECK: r0 <<= 32 +; CHECK: r0 >>= 32 +; CHECK: if r0 > 7 goto [[LABEL:.*]] +; CHECK: [[REG6]] += r0 +; CHECK: [[LABEL]]: +; CHECK: r0 = [[REG6]] + +; CHECK-DISABLE: [[REG1:r[0-9]+]] = r0 +; CHECK-DISABLE: [[REG1]] <<= 32 +; CHECK-DISABLE: [[REG1]] >>= 32 +; CHECK-DISABLE: r0 = [[REG6]] +; CHECK-DISABLE: r0 += [[REG1]] +; CHECK-DISABLE: [[REG2:r[0-9]+]] = 8 +; CHECK-DISABLE: if [[REG2]] > [[REG1]] goto [[LABEL:.*]] +; CHECK-DISABLE: r0 = [[REG6]] +; CHECK-DISABLE: [[LABEL]]: + +; CHECK-COMMON: exit + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +declare dso_local i32 @foo(...) #2 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +attributes #0 = { nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git ca9c5433a6c31e372092fcd8bfd0e4fddd7e8784)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"any pointer", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7, !7, i64 0} +!7 = !{!"int", !4, i64 0} From d6af25e07c35a11bf6231151fe23a17337e8488f Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 7 Oct 2020 15:50:43 +0000 Subject: [PATCH 237/321] [gn build] Port ddf1864ace4 --- llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn index d3a9b728b48c7..e1fcc140c6efd 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn @@ -60,6 +60,7 @@ static_library("LLVMBPFCodeGen") { include_dirs = [ "." ] sources = [ "BPFAbstractMemberAccess.cpp", + "BPFAdjustOpt.cpp", "BPFAsmPrinter.cpp", "BPFCheckAndAdjustIR.cpp", "BPFFrameLowering.cpp", From 14d5ee63e3c3c625d5432db8c7fd6a2bdd7e6809 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 7 Oct 2020 08:56:13 -0700 Subject: [PATCH 238/321] [Tests] Precommit test showing gap around load forwarding of vectors in instcombine --- llvm/test/Transforms/InstCombine/load.ll | 70 +++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/llvm/test/Transforms/InstCombine/load.ll b/llvm/test/Transforms/InstCombine/load.ll index 032da41de6a5c..79adbe1173332 100644 --- a/llvm/test/Transforms/InstCombine/load.ll +++ b/llvm/test/Transforms/InstCombine/load.ll @@ -2,7 +2,7 @@ ; RUN: opt -instcombine -S < %s | FileCheck %s ; RUN: opt -passes=instcombine -S < %s | FileCheck %s -target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1" @X = constant i32 42 ; [#uses=2] @X2 = constant i32 47 ; [#uses=1] @@ -344,3 +344,71 @@ define void @test20(* %x, * %y) { store %x.load, * %y, align 1 ret void } + + +; Check that non-integral pointers are not coverted using inttoptr + +declare void @use(i8*) +declare void @use.p1(i8 addrspace(1)*) + +define i64 @test21(i64* %P) { +; CHECK-LABEL: @test21( +; CHECK-NEXT: [[X:%.*]] = load i64, i64* [[P:%.*]], align 8 +; CHECK-NEXT: [[Y_CAST:%.*]] = inttoptr i64 [[X]] to i8* +; CHECK-NEXT: call void @use(i8* [[Y_CAST]]) +; CHECK-NEXT: ret i64 [[X]] +; + %P.ptr = bitcast i64* %P to i8** + %X = load i64, i64* %P + %Y = load i8*, i8** %P.ptr + call void @use(i8* %Y) + ret i64 %X +} + +define i64 @test22(i64* %P) { +; CHECK-LABEL: @test22( +; CHECK-NEXT: [[P_PTR:%.*]] = bitcast i64* [[P:%.*]] to i8 addrspace(1)** +; CHECK-NEXT: [[X:%.*]] = load i64, i64* [[P]], align 8 +; CHECK-NEXT: [[Y:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[P_PTR]], align 8 +; CHECK-NEXT: call void @use.p1(i8 addrspace(1)* [[Y]]) +; CHECK-NEXT: ret i64 [[X]] +; + %P.ptr = bitcast i64* %P to i8 addrspace(1)** + %X = load i64, i64* %P + %Y = load i8 addrspace(1)*, i8 addrspace(1)** %P.ptr + call void @use.p1(i8 addrspace(1)* %Y) + ret i64 %X +} + +declare void @use.v2.p0(<2 x i8*>) +declare void @use.v2.p1(<2 x i8 addrspace(1)*>) + +define <2 x i64> @test23(<2 x i64>* %P) { +; CHECK-LABEL: @test23( +; CHECK-NEXT: [[P_PTR:%.*]] = bitcast <2 x i64>* [[P:%.*]] to <2 x i8*>* +; CHECK-NEXT: [[X:%.*]] = load <2 x i64>, <2 x i64>* [[P]], align 16 +; CHECK-NEXT: [[Y:%.*]] = load <2 x i8*>, <2 x i8*>* [[P_PTR]], align 16 +; CHECK-NEXT: call void @use.v2.p0(<2 x i8*> [[Y]]) +; CHECK-NEXT: ret <2 x i64> [[X]] +; + %P.ptr = bitcast <2 x i64>* %P to <2 x i8*>* + %X = load <2 x i64>, <2 x i64>* %P + %Y = load <2 x i8*>, <2 x i8*>* %P.ptr + call void @use.v2.p0(<2 x i8*> %Y) + ret <2 x i64> %X +} + +define <2 x i64> @test24(<2 x i64>* %P) { +; CHECK-LABEL: @test24( +; CHECK-NEXT: [[P_PTR:%.*]] = bitcast <2 x i64>* [[P:%.*]] to <2 x i8 addrspace(1)*>* +; CHECK-NEXT: [[X:%.*]] = load <2 x i64>, <2 x i64>* [[P]], align 16 +; CHECK-NEXT: [[Y:%.*]] = load <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*>* [[P_PTR]], align 16 +; CHECK-NEXT: call void @use.v2.p1(<2 x i8 addrspace(1)*> [[Y]]) +; CHECK-NEXT: ret <2 x i64> [[X]] +; + %P.ptr = bitcast <2 x i64>* %P to <2 x i8 addrspace(1)*>* + %X = load <2 x i64>, <2 x i64>* %P + %Y = load <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*>* %P.ptr + call void @use.v2.p1(<2 x i8 addrspace(1)*> %Y) + ret <2 x i64> %X +} From bef27e50b9a2dd272a0c48e5237daeab50f77c7a Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 7 Oct 2020 18:59:40 +0300 Subject: [PATCH 239/321] [NFC][InstCombine] Autogenerate a few tests being affected by upcoming patch --- llvm/test/Transforms/InstCombine/PR30597.ll | 16 +- llvm/test/Transforms/InstCombine/intptr1.ll | 152 +++++++++++++----- .../Transforms/InstCombine/load-bitcast32.ll | 61 ++++--- .../Transforms/InstCombine/memset_chk-1.ll | 12 +- 4 files changed, 166 insertions(+), 75 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/PR30597.ll b/llvm/test/Transforms/InstCombine/PR30597.ll index c0803ed71204d..ceca3961bbed2 100644 --- a/llvm/test/Transforms/InstCombine/PR30597.ll +++ b/llvm/test/Transforms/InstCombine/PR30597.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -5,6 +6,10 @@ target triple = "x86_64-unknown-linux-gnu" ; Function Attrs: readonly uwtable define i1 @dot_ref_s(i32** noalias nocapture readonly dereferenceable(8)) { +; CHECK-LABEL: @dot_ref_s( +; CHECK-NEXT: entry-block: +; CHECK-NEXT: ret i1 false +; entry-block: %loadedptr = load i32*, i32** %0, align 8, !nonnull !0 %ptrtoint = ptrtoint i32* %loadedptr to i64 @@ -12,19 +17,20 @@ entry-block: %switchtmp = icmp eq i32* %inttoptr, null ret i1 %switchtmp -; CHECK-LABEL: @dot_ref_s -; CHECK-NEXT: entry-block: -; CHECK-NEXT: ret i1 false } ; Function Attrs: readonly uwtable define i64* @function(i64* noalias nocapture readonly dereferenceable(8)) { +; CHECK-LABEL: @function( +; CHECK-NEXT: entry-block: +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[TMP0:%.*]] to i64** +; CHECK-NEXT: [[LOADED1:%.*]] = load i64*, i64** [[TMP1]], align 8, !nonnull !0 +; CHECK-NEXT: ret i64* [[LOADED1]] +; entry-block: %loaded = load i64, i64* %0, align 8, !range !1 %inttoptr = inttoptr i64 %loaded to i64* ret i64* %inttoptr -; CHECK-LABEL: @function -; CHECK: %{{.+}} = load i64*, i64** %{{.+}}, align 8, !nonnull } diff --git a/llvm/test/Transforms/InstCombine/intptr1.ll b/llvm/test/Transforms/InstCombine/intptr1.ll index 1e00f5e06d513..3d25c9d5f5a78 100644 --- a/llvm/test/Transforms/InstCombine/intptr1.ll +++ b/llvm/test/Transforms/InstCombine/intptr1.ll @@ -1,36 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s define void @test1(float* %a, float* readnone %a_end, i64* %b.i64) { -; CHECK-LABEL: @test1 +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult float* [[A:%.*]], [[A_END:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[B_I64:%.*]] to float** +; CHECK-NEXT: [[B1:%.*]] = load float*, float** [[TMP0]], align 8 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[A_ADDR_03:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[A]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[B_ADDR_02_PTR:%.*]] = phi float* [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[B1]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[B_ADDR_02_PTR]], align 4 +; CHECK-NEXT: [[MUL_I:%.*]] = fmul float [[TMP1]], 4.200000e+01 +; CHECK-NEXT: store float [[MUL_I]], float* [[A_ADDR_03]], align 4 +; CHECK-NEXT: [[ADD]] = getelementptr inbounds float, float* [[B_ADDR_02_PTR]], i64 1 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[A_ADDR_03]], i64 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult float* [[INCDEC_PTR]], [[A_END]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; entry: %cmp1 = icmp ult float* %a, %a_end br i1 %cmp1, label %for.body.preheader, label %for.end for.body.preheader: ; preds = %entry %b = load i64, i64* %b.i64, align 8 -; CHECK: load float*, float** br label %for.body for.body: ; preds = %for.body, %for.body.preheader %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] %b.addr.02 = phi i64 [ %add.int, %for.body ], [ %b, %for.body.preheader ] -; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] -; CHECK: %b.addr.02.ptr = phi float* [ %add, %for.body ], -; CHECK-NOT: %b.addr.02 = phi i64 %tmp = inttoptr i64 %b.addr.02 to float* -; CHECK-NOT: inttoptr i64 %tmp1 = load float, float* %tmp, align 4 -; CHECK: = load %mul.i = fmul float %tmp1, 4.200000e+01 store float %mul.i, float* %a.addr.03, align 4 %add = getelementptr inbounds float, float* %tmp, i64 1 %add.int = ptrtoint float* %add to i64 -; CHECK: %add = getelementptr -; CHECK-NOT: ptrtoint float* %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1 -; CHECK: %incdec.ptr = %cmp = icmp ult float* %incdec.ptr, %a_end br i1 %cmp, label %for.body, label %for.end @@ -39,7 +51,31 @@ for.end: ; preds = %for.body, %entry } define void @test1_neg(float* %a, float* readnone %a_end, i64* %b.i64) { -; CHECK-LABEL: @test1_neg +; CHECK-LABEL: @test1_neg( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult float* [[A:%.*]], [[A_END:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[B:%.*]] = load i64, i64* [[B_I64:%.*]], align 8 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[A_ADDR_03:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[BB:%.*]] ], [ [[A]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[B_ADDR_02:%.*]] = phi i64 [ [[ADD_INT:%.*]], [[BB]] ], [ [[B]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[TMP:%.*]] = inttoptr i64 [[B_ADDR_02]] to float* +; CHECK-NEXT: [[PTRCMP:%.*]] = icmp ult float* [[TMP]], [[A_END]] +; CHECK-NEXT: br i1 [[PTRCMP]], label [[FOR_END]], label [[BB]] +; CHECK: bb: +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[A]], align 4 +; CHECK-NEXT: [[MUL_I:%.*]] = fmul float [[TMP1]], 4.200000e+01 +; CHECK-NEXT: store float [[MUL_I]], float* [[A_ADDR_03]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = getelementptr inbounds float, float* [[A]], i64 1 +; CHECK-NEXT: [[ADD_INT]] = ptrtoint float* [[ADD]] to i64 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[A_ADDR_03]], i64 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult float* [[INCDEC_PTR]], [[A_END]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; entry: %cmp1 = icmp ult float* %a, %a_end br i1 %cmp1, label %for.body.preheader, label %for.end @@ -52,11 +88,8 @@ for.body: ; preds = %for.body, %for.body %a.addr.03 = phi float* [ %incdec.ptr, %bb ], [ %a, %for.body.preheader ] %b.addr.02 = phi i64 [ %add.int, %bb ], [ %b, %for.body.preheader ] -; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %bb ], [ %a, %for.body.preheader ] -; CHECK: %b.addr.02 = phi i64 %tmp = inttoptr i64 %b.addr.02 to float* -; CHECK: inttoptr i64 %ptrcmp = icmp ult float* %tmp, %a_end br i1 %ptrcmp, label %for.end, label %bb @@ -66,7 +99,6 @@ bb: store float %mul.i, float* %a.addr.03, align 4 %add = getelementptr inbounds float, float* %a, i64 1 %add.int = ptrtoint float* %add to i64 -; CHECK: ptrtoint float* %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1 %cmp = icmp ult float* %incdec.ptr, %a_end br i1 %cmp, label %for.body, label %for.end @@ -77,7 +109,26 @@ for.end: ; preds = %for.body, %entry define void @test2(float* %a, float* readnone %a_end, float** %b.float) { -; CHECK-LABEL: @test2 +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult float* [[A:%.*]], [[A_END:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[B1:%.*]] = load float*, float** [[B_FLOAT:%.*]], align 8 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[A_ADDR_03:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[A]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[B_ADDR_02_PTR:%.*]] = phi float* [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[B1]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[B_ADDR_02_PTR]], align 4 +; CHECK-NEXT: [[MUL_I:%.*]] = fmul float [[TMP1]], 4.200000e+01 +; CHECK-NEXT: store float [[MUL_I]], float* [[A_ADDR_03]], align 4 +; CHECK-NEXT: [[ADD]] = getelementptr inbounds float, float* [[B_ADDR_02_PTR]], i64 1 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[A_ADDR_03]], i64 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult float* [[INCDEC_PTR]], [[A_END]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; entry: %cmp1 = icmp ult float* %a, %a_end br i1 %cmp1, label %for.body.preheader, label %for.end @@ -85,29 +136,20 @@ entry: for.body.preheader: ; preds = %entry %b.i64 = bitcast float** %b.float to i64* %b = load i64, i64* %b.i64, align 8 -; CHECK: load float*, float** br label %for.body for.body: ; preds = %for.body, %for.body.preheader %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] %b.addr.02 = phi i64 [ %add.int, %for.body ], [ %b, %for.body.preheader ] -; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] -; CHECK: %b.addr.02.ptr = phi float* [ %add, %for.body ], -; CHECK-NOT: %b.addr.02 = phi i64 %tmp = inttoptr i64 %b.addr.02 to float* -; CHECK-NOT: inttoptr i64 %tmp1 = load float, float* %tmp, align 4 -; CHECK: = load %mul.i = fmul float %tmp1, 4.200000e+01 store float %mul.i, float* %a.addr.03, align 4 %add = getelementptr inbounds float, float* %tmp, i64 1 -; CHECK: %add = %add.int = ptrtoint float* %add to i64 -; CHECK-NOT: ptrtoint float* %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1 -; CHECK: %incdec.ptr = %cmp = icmp ult float* %incdec.ptr, %a_end br i1 %cmp, label %for.body, label %for.end @@ -117,7 +159,27 @@ for.end: ; preds = %for.body, %entry define void @test3(float* %a, float* readnone %a_end, i8** %b.i8p) { -; CHECK-LABEL: @test3 +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult float* [[A:%.*]], [[A_END:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8** [[B_I8P:%.*]] to float** +; CHECK-NEXT: [[B1:%.*]] = load float*, float** [[TMP0]], align 8 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[A_ADDR_03:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[A]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[B_ADDR_02_PTR:%.*]] = phi float* [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[B1]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[B_ADDR_02_PTR]], align 4 +; CHECK-NEXT: [[MUL_I:%.*]] = fmul float [[TMP1]], 4.200000e+01 +; CHECK-NEXT: store float [[MUL_I]], float* [[A_ADDR_03]], align 4 +; CHECK-NEXT: [[ADD]] = getelementptr inbounds float, float* [[B_ADDR_02_PTR]], i64 1 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[A_ADDR_03]], i64 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult float* [[INCDEC_PTR]], [[A_END]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; entry: %cmp1 = icmp ult float* %a, %a_end br i1 %cmp1, label %for.body.preheader, label %for.end @@ -125,29 +187,20 @@ entry: for.body.preheader: ; preds = %entry %b.i64 = bitcast i8** %b.i8p to i64* %b = load i64, i64* %b.i64, align 8 -; CHECK: load float*, float** br label %for.body for.body: ; preds = %for.body, %for.body.preheader %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] %b.addr.02 = phi i64 [ %add.int, %for.body ], [ %b, %for.body.preheader ] -; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] -; CHECK: %b.addr.02.ptr = phi float* [ %add, %for.body ], -; CHECK-NOT: %b.addr.02 = phi i64 %tmp = inttoptr i64 %b.addr.02 to float* -; CHECK-NOT: inttoptr i64 %tmp1 = load float, float* %tmp, align 4 -; CHECK: = load %mul.i = fmul float %tmp1, 4.200000e+01 store float %mul.i, float* %a.addr.03, align 4 %add = getelementptr inbounds float, float* %tmp, i64 1 -; CHECK: %add = getelementptr %add.int = ptrtoint float* %add to i64 -; CHECK-NOT: ptrtoint float* %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1 -; CHECK: %incdec.ptr = %cmp = icmp ult float* %incdec.ptr, %a_end br i1 %cmp, label %for.body, label %for.end @@ -157,34 +210,45 @@ for.end: ; preds = %for.body, %entry define void @test4(float* %a, float* readnone %a_end, float** %b.float) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult float* [[A:%.*]], [[A_END:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[B_F12:%.*]] = load float*, float** [[B_FLOAT:%.*]], align 8 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[A_ADDR_03:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[A]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[B_ADDR_02_PTR:%.*]] = phi float* [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[B_F12]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[B_ADDR_02_PTR]], align 4 +; CHECK-NEXT: [[MUL_I:%.*]] = fmul float [[TMP1]], 4.200000e+01 +; CHECK-NEXT: store float [[MUL_I]], float* [[A_ADDR_03]], align 4 +; CHECK-NEXT: [[ADD]] = getelementptr inbounds float, float* [[B_ADDR_02_PTR]], i64 1 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[A_ADDR_03]], i64 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult float* [[INCDEC_PTR]], [[A_END]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; entry: -; CHECK-LABEL: @test4 %cmp1 = icmp ult float* %a, %a_end br i1 %cmp1, label %for.body.preheader, label %for.end for.body.preheader: ; preds = %entry %b.f = load float*, float** %b.float, align 8 %b = ptrtoint float* %b.f to i64 -; CHECK: load float*, float** -; CHECK-NOT: ptrtoint float* br label %for.body -; CHECK: br label %for.body for.body: ; preds = %for.body, %for.body.preheader %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] %b.addr.02 = phi i64 [ %add.int, %for.body ], [ %b, %for.body.preheader ] %tmp = inttoptr i64 %b.addr.02 to float* -; CHECK-NOT: inttoptr i64 %tmp1 = load float, float* %tmp, align 4 -; CHECK: = load %mul.i = fmul float %tmp1, 4.200000e+01 store float %mul.i, float* %a.addr.03, align 4 %add = getelementptr inbounds float, float* %tmp, i64 1 -; CHECK: %add = %add.int = ptrtoint float* %add to i64 -; CHECK-NOT: ptrtoint float* %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1 -; CHECK: %incdec.ptr = %cmp = icmp ult float* %incdec.ptr, %a_end br i1 %cmp, label %for.body, label %for.end diff --git a/llvm/test/Transforms/InstCombine/load-bitcast32.ll b/llvm/test/Transforms/InstCombine/load-bitcast32.ll index b1c78a8a314eb..c9de0b96f4af0 100644 --- a/llvm/test/Transforms/InstCombine/load-bitcast32.ll +++ b/llvm/test/Transforms/InstCombine/load-bitcast32.ll @@ -1,13 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -instcombine -S < %s | FileCheck %s target datalayout = "p:32:32:32" define i64* @test1(i8* %x) { -entry: ; CHECK-LABEL: @test1( -; CHECK: load i64, i64* -; CHECK: ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = bitcast i8* [[X:%.*]] to i64* +; CHECK-NEXT: [[B:%.*]] = load i64, i64* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[B]] to i32 +; CHECK-NEXT: [[C:%.*]] = inttoptr i32 [[TMP0]] to i64* +; CHECK-NEXT: ret i64* [[C]] +; +entry: %a = bitcast i8* %x to i64* %b = load i64, i64* %a %c = inttoptr i64 %b to i64* @@ -16,10 +22,13 @@ entry: } define i32* @test2(i8* %x) { -entry: ; CHECK-LABEL: @test2( -; CHECK: load i32*, i32** -; CHECK: ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32** +; CHECK-NEXT: [[B1:%.*]] = load i32*, i32** [[TMP0]], align 4 +; CHECK-NEXT: ret i32* [[B1]] +; +entry: %a = bitcast i8* %x to i32* %b = load i32, i32* %a %c = inttoptr i32 %b to i32* @@ -28,10 +37,13 @@ entry: } define i64* @test3(i8* %x) { -entry: ; CHECK-LABEL: @test3( -; CHECK: load i64*, i64** -; CHECK: ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64** +; CHECK-NEXT: [[B1:%.*]] = load i64*, i64** [[TMP0]], align 4 +; CHECK-NEXT: ret i64* [[B1]] +; +entry: %a = bitcast i8* %x to i32* %b = load i32, i32* %a %c = inttoptr i32 %b to i64* @@ -40,11 +52,14 @@ entry: } define i64 @test4(i8* %x) { -entry: ; CHECK-LABEL: @test4( -; CHECK: load i32, i32* -; CHECK: zext -; CHECK: ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* +; CHECK-NEXT: [[B1:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[C:%.*]] = zext i32 [[B1]] to i64 +; CHECK-NEXT: ret i64 [[C]] +; +entry: %a = bitcast i8* %x to i64** %b = load i64*, i64** %a %c = ptrtoint i64* %b to i64 @@ -53,10 +68,13 @@ entry: } define i32 @test5(i8* %x) { -entry: ; CHECK-LABEL: @test5( -; CHECK: load i32, i32* -; CHECK: ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* +; CHECK-NEXT: [[B1:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: ret i32 [[B1]] +; +entry: %a = bitcast i8* %x to i32** %b = load i32*, i32** %a %c = ptrtoint i32* %b to i32 @@ -65,11 +83,14 @@ entry: } define i64 @test6(i8* %x) { -entry: ; CHECK-LABEL: @test6( -; CHECK: load i32, i32* -; CHECK: zext -; CHECK: ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* +; CHECK-NEXT: [[B1:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[C:%.*]] = zext i32 [[B1]] to i64 +; CHECK-NEXT: ret i64 [[C]] +; +entry: %a = bitcast i8* %x to i32** %b = load i32*, i32** %a %c = ptrtoint i32* %b to i64 diff --git a/llvm/test/Transforms/InstCombine/memset_chk-1.ll b/llvm/test/Transforms/InstCombine/memset_chk-1.ll index ad45fb86299d4..6068c550c4c09 100644 --- a/llvm/test/Transforms/InstCombine/memset_chk-1.ll +++ b/llvm/test/Transforms/InstCombine/memset_chk-1.ll @@ -78,13 +78,13 @@ define i32 @test_rauw(i8* %a, i8* %b, i8** %c) { ; CHECK-NEXT: [[YO107:%.*]] = call i64 @llvm.objectsize.i64.p0i8(i8* [[B:%.*]], i1 false, i1 false, i1 false) ; CHECK-NEXT: [[CALL50:%.*]] = call i8* @__memmove_chk(i8* [[B]], i8* [[A]], i64 [[ADD180]], i64 [[YO107]]) ; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(i8* nonnull dereferenceable(1) [[B]]) -; CHECK-NEXT: [[STRCHR2:%.*]] = getelementptr i8, i8* [[B]], i64 [[STRLEN]] +; CHECK-NEXT: [[STRCHR1:%.*]] = getelementptr i8, i8* [[B]], i64 [[STRLEN]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8** [[C:%.*]] to i64* -; CHECK-NEXT: [[D1:%.*]] = load i64, i64* [[TMP0]], align 8 +; CHECK-NEXT: [[D2:%.*]] = load i64, i64* [[TMP0]], align 8 ; CHECK-NEXT: [[SUB183:%.*]] = ptrtoint i8* [[B]] to i64 -; CHECK-NEXT: [[SUB184:%.*]] = sub i64 [[D1]], [[SUB183]] +; CHECK-NEXT: [[SUB184:%.*]] = sub i64 [[D2]], [[SUB183]] ; CHECK-NEXT: [[ADD52_I_I:%.*]] = add nsw i64 [[SUB184]], 1 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[STRCHR2]], i8 0, i64 [[ADD52_I_I]], i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[STRCHR1]], i8 0, i64 [[ADD52_I_I]], i1 false) ; CHECK-NEXT: ret i32 4 ; entry: @@ -114,13 +114,13 @@ declare i8* @__memset_chk(i8*, i32, i64, i64) define float* @pr25892(i64 %size) #0 { ; CHECK-LABEL: @pr25892( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @malloc(i64 [[SIZE:%.*]]) #3 +; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @malloc(i64 [[SIZE:%.*]]) [[ATTR3:#.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[CALL]], null ; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: ; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[CALL]] to float* ; CHECK-NEXT: [[CALL2:%.*]] = tail call i64 @llvm.objectsize.i64.p0i8(i8* nonnull [[CALL]], i1 false, i1 false, i1 false) -; CHECK-NEXT: [[CALL3:%.*]] = tail call i8* @__memset_chk(i8* nonnull [[CALL]], i32 0, i64 [[SIZE]], i64 [[CALL2]]) #3 +; CHECK-NEXT: [[CALL3:%.*]] = tail call i8* @__memset_chk(i8* nonnull [[CALL]], i32 0, i64 [[SIZE]], i64 [[CALL2]]) [[ATTR3]] ; CHECK-NEXT: br label [[CLEANUP]] ; CHECK: cleanup: ; CHECK-NEXT: [[RETVAL_0:%.*]] = phi float* [ [[BC]], [[IF_END]] ], [ null, [[ENTRY:%.*]] ] From 7a01fc5abe4cb7b87ddd214b885426a3079636e9 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 6 Oct 2020 22:10:10 +0200 Subject: [PATCH 240/321] [MemCpyOpt] Add additional callslot test cases (NFC) For cases where the destination is captured. --- llvm/test/Transforms/MemCpyOpt/callslot.ll | 64 ++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll index 90f1833a2d5cc..8aaf46dbb17ef 100644 --- a/llvm/test/Transforms/MemCpyOpt/callslot.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -162,6 +162,70 @@ define void @dest_is_gep_requires_movement() { ret void } +define void @capture_before_call_argmemonly() { +; CHECK-LABEL: @capture_before_call_argmemonly( +; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[DEST_I8:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* +; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I8]]) +; CHECK-NEXT: [[DEST1:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST1]]) [[ATTR4:#.*]] +; CHECK-NEXT: ret void +; + %dest = alloca [16 x i8] + %src = alloca [16 x i8] + %dest.i8 = bitcast [16 x i8]* %dest to i8* + %src.i8 = bitcast [16 x i8]* %src to i8* + call void @accept_ptr(i8* %dest.i8) ; capture + call void @accept_ptr(i8* %src.i8) argmemonly + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest.i8, i8* %src.i8, i64 16, i1 false) + ret void +} + +define void @capture_before_call_argmemonly_nounwind() { +; CHECK-LABEL: @capture_before_call_argmemonly_nounwind( +; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[DEST_I8:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* +; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I8]]) +; CHECK-NEXT: [[DEST1:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST1]]) [[ATTR5:#.*]] +; CHECK-NEXT: ret void +; + %dest = alloca [16 x i8] + %src = alloca [16 x i8] + %dest.i8 = bitcast [16 x i8]* %dest to i8* + %src.i8 = bitcast [16 x i8]* %src to i8* + call void @accept_ptr(i8* %dest.i8) ; capture + ; NB: argmemonly currently implies willreturn. + call void @accept_ptr(i8* %src.i8) argmemonly nounwind + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest.i8, i8* %src.i8, i64 16, i1 false) + ret void +} + +define void @capture_before_call_argmemonly_nounwind_willreturn() { +; CHECK-LABEL: @capture_before_call_argmemonly_nounwind_willreturn( +; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[DEST_I8:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* +; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I8]]) +; CHECK-NEXT: [[DEST1:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST1]]) [[ATTR0:#.*]] +; CHECK-NEXT: ret void +; + %dest = alloca [16 x i8] + %src = alloca [16 x i8] + %dest.i8 = bitcast [16 x i8]* %dest to i8* + %src.i8 = bitcast [16 x i8]* %src to i8* + call void @accept_ptr(i8* %dest.i8) ; capture + call void @accept_ptr(i8* %src.i8) argmemonly nounwind willreturn + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest.i8, i8* %src.i8, i64 16, i1 false) + ret void +} + declare void @may_throw() declare void @accept_ptr(i8*) declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) From 0a3c82e85b73e51e830b57844b2f5b98cb59afd1 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Wed, 7 Oct 2020 16:29:10 +0100 Subject: [PATCH 241/321] [clang-format][NFC] Store FormatToken::Type as an enum instead of bitfield This improves the debugging experience since LLDB will print the enumerator name instead of a decimal number. This changes TokenType to have uint8_t as the underlying type and moves it after the remaining bitfields to avoid increasing the size of FormatToken. Reviewed By: MyDeveloperDay Differential Revision: https://reviews.llvm.org/D87006 --- clang/lib/Format/FormatToken.h | 27 +++++++++++------------- clang/lib/Format/UnwrappedLineParser.cpp | 2 +- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index c6af71a768a1a..9cc65bb11b54e 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -118,7 +118,7 @@ namespace format { /// Determines the semantic type of a syntactic token, e.g. whether "<" is a /// template opener or binary operator. -enum TokenType { +enum TokenType : uint8_t { #define TYPE(X) TT_##X, LIST_TOKEN_TYPES #undef TYPE @@ -211,8 +211,8 @@ struct FormatToken { ClosesTemplateDeclaration(false), StartsBinaryExpression(false), EndsBinaryExpression(false), PartOfMultiVariableDeclStmt(false), ContinuesLineCommentSection(false), Finalized(false), - BlockKind(BK_Unknown), Type(TT_Unknown), Decision(FD_Unformatted), - PackingKind(PPK_Inconclusive) {} + BlockKind(BK_Unknown), Decision(FD_Unformatted), + PackingKind(PPK_Inconclusive), Type(TT_Unknown) {} /// The \c Token. Token Tok; @@ -297,18 +297,6 @@ struct FormatToken { assert(getBlockKind() == BBK && "BraceBlockKind overflow!"); } -private: - unsigned Type : 8; - -public: - /// Returns the token's type, e.g. whether "<" is a template opener or - /// binary operator. - TokenType getType() const { return static_cast(Type); } - void setType(TokenType T) { - Type = T; - assert(getType() == T && "TokenType overflow!"); - } - private: /// Stores the formatting decision for the token once it was made. unsigned Decision : 2; @@ -335,6 +323,15 @@ struct FormatToken { assert(getPackingKind() == K && "ParameterPackingKind overflow!"); } +private: + TokenType Type; + +public: + /// Returns the token's type, e.g. whether "<" is a template opener or + /// binary operator. + TokenType getType() const { return Type; } + void setType(TokenType T) { Type = T; } + /// The number of newlines immediately before the \c Token. /// /// This can be used to determine what the user wrote in the original code diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index b599168b48e17..7075a6fe33f76 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -2753,7 +2753,7 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, E = Line.Tokens.end(); I != E; ++I) { llvm::dbgs() << I->Tok->Tok.getName() << "[" - << "T=" << I->Tok->getType() + << "T=" << (unsigned)I->Tok->getType() << ", OC=" << I->Tok->OriginalColumn << "] "; } for (std::list::const_iterator I = Line.Tokens.begin(), From ff6e4441b93953efb2c52995e79e211a49ffac06 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Wed, 7 Oct 2020 16:29:22 +0100 Subject: [PATCH 242/321] [clang-format][tests] Fix MacroExpander lexer not parsing C++ keywords While debugging a different clang-format failure, I tried to reuse the MacroExpander lexer, but was surprised to see that it marks all C++ keywords (e.g. const, decltype) as being of type identifier. After stepping through the ::format() code, I noticed that the difference between these two is that the identifier table was not being initialized based on the FormatStyle, so only basic tokens such as tok::semi, tok::plus, etc. were being handled. Reviewed By: klimek Differential Revision: https://reviews.llvm.org/D88952 --- clang/unittests/Format/MacroExpanderTest.cpp | 16 ++++++++++++++++ clang/unittests/Format/TestLexer.h | 6 ++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/clang/unittests/Format/MacroExpanderTest.cpp b/clang/unittests/Format/MacroExpanderTest.cpp index 59c67f29bedde..20e1dba0d49a0 100644 --- a/clang/unittests/Format/MacroExpanderTest.cpp +++ b/clang/unittests/Format/MacroExpanderTest.cpp @@ -182,6 +182,22 @@ TEST_F(MacroExpanderTest, SingleExpansion) { EXPECT_ATTRIBUTES(Result, Attributes); } +TEST_F(MacroExpanderTest, UnderstandsCppTokens) { + auto Macros = create({"A(T,name)=T name = 0;"}); + auto *A = Lex.id("A"); + auto Args = lexArgs({"const int", "x"}); + auto Result = uneof(Macros->expand(A, Args)); + std::vector Attributes = { + {tok::kw_const, MR_ExpandedArg, 1, 0, {A}}, + {tok::kw_int, MR_ExpandedArg, 0, 0, {A}}, + {tok::identifier, MR_ExpandedArg, 0, 0, {A}}, + {tok::equal, MR_Hidden, 0, 0, {A}}, + {tok::numeric_constant, MR_Hidden, 0, 0, {A}}, + {tok::semi, MR_Hidden, 0, 1, {A}}, + }; + EXPECT_ATTRIBUTES(Result, Attributes); +} + } // namespace } // namespace format } // namespace clang diff --git a/clang/unittests/Format/TestLexer.h b/clang/unittests/Format/TestLexer.h index 8c5eb2b029fb3..2b56f10dd3793 100644 --- a/clang/unittests/Format/TestLexer.h +++ b/clang/unittests/Format/TestLexer.h @@ -55,7 +55,9 @@ inline std::string text(llvm::ArrayRef Tokens) { class TestLexer { public: - TestLexer() : SourceMgr("test.cpp", "") {} + TestLexer(FormatStyle Style = getLLVMStyle()) + : Style(Style), SourceMgr("test.cpp", ""), + IdentTable(getFormattingLangOpts(Style)) {} TokenList lex(llvm::StringRef Code) { Buffers.push_back( @@ -74,7 +76,7 @@ class TestLexer { return Result[0]; } - FormatStyle Style = getLLVMStyle(); + FormatStyle Style; encoding::Encoding Encoding = encoding::Encoding_UTF8; std::vector> Buffers; clang::SourceManagerForFile SourceMgr; From b953a01b2cd04263c878292c609686647be396ad Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 7 Oct 2020 16:37:53 +0200 Subject: [PATCH 243/321] Reapply [ADT] function_ref's constructor is unavailable if the argument is not callable. This reverts commit 281703e67ffaee8e26efef86e0df3e145477f4cb. GCC 5.4 bugs are worked around by avoiding use of variable templates. Differential Revision: https://reviews.llvm.org/D88977 --- llvm/include/llvm/ADT/STLExtras.h | 8 +++++++- llvm/lib/AsmParser/LLParser.h | 4 ++-- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 4 +--- llvm/unittests/ADT/FunctionRefTest.cpp | 11 +++++++++++ 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 4be016b795a0f..0d18164bcce5c 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -193,9 +193,15 @@ class function_ref { template function_ref( Callable &&callable, + // This is not the copy-constructor. std::enable_if_t< !std::is_same>, - function_ref>::value> * = nullptr) + function_ref>::value> * = nullptr, + // Functor must be callable and return a suitable type. + std::enable_if_t::value || + std::is_convertible< + std::result_of_t, Ret>::value> + * = nullptr) : callback(callback_fn::type>), callable(reinterpret_cast(&callable)) {} diff --git a/llvm/lib/AsmParser/LLParser.h b/llvm/lib/AsmParser/LLParser.h index a7fbcdd5abc54..5f581f0d4efb5 100644 --- a/llvm/lib/AsmParser/LLParser.h +++ b/llvm/lib/AsmParser/LLParser.h @@ -166,8 +166,8 @@ namespace llvm { : Context(Context), Lex(F, SM, Err, Context), M(M), Index(Index), Slots(Slots), BlockAddressPFS(nullptr) {} bool Run( - bool UpgradeDebugInfo, - DataLayoutCallbackTy DataLayoutCallback = [](Module *) {}); + bool UpgradeDebugInfo, DataLayoutCallbackTy DataLayoutCallback = + [](StringRef) { return None; }); bool parseStandaloneConstantValue(Constant *&C, const SlotMapping *Slots); diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 4d69dd7dcc5d6..15ca3a54da2d3 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -579,9 +579,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { /// \returns true if an error occurred. Error parseBitcodeInto( Module *M, bool ShouldLazyLoadMetadata = false, bool IsImporting = false, - DataLayoutCallbackTy DataLayoutCallback = [](std::string) { - return None; - }); + DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }); static uint64_t decodeSignRotatedValue(uint64_t V); diff --git a/llvm/unittests/ADT/FunctionRefTest.cpp b/llvm/unittests/ADT/FunctionRefTest.cpp index 669b87dbf8e41..f084aa7a660b4 100644 --- a/llvm/unittests/ADT/FunctionRefTest.cpp +++ b/llvm/unittests/ADT/FunctionRefTest.cpp @@ -48,4 +48,15 @@ TEST(FunctionRefTest, BadCopy) { ASSERT_EQ(1, X()); } +// Test that overloads on function_refs are resolved as expected. +const char *returns(StringRef) { return "not a function"; } +const char *returns(function_ref F) { return "number"; } +const char *returns(function_ref F) { return "string"; } + +TEST(FunctionRefTest, SFINAE) { + EXPECT_EQ("not a function", returns("boo!")); + EXPECT_EQ("number", returns([] { return 42; })); + EXPECT_EQ("string", returns([] { return "hello"; })); +} + } // namespace From 91a98ec11e2e001d01c47286bc1721046beeae62 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 6 Oct 2020 13:50:41 +0200 Subject: [PATCH 244/321] [json] Provide a means to delegate writing a value to another API (Based on D87170 by dsanders) I recently had need to call out to an external API to emit a JSON object as part of one an LLVM tool was emitting. However, our JSON support didn't provide a way to delegate part of the JSON output to that API. Add rawValueBegin() and rawValueEnd() to maintain and check the internal state while something else is writing to the stream. It's the users responsibility to ensure that the resulting JSON output is still valid. Differential Revision: https://reviews.llvm.org/D88902 --- llvm/include/llvm/Support/JSON.h | 16 ++++++++++++- llvm/lib/Support/JSON.cpp | 37 ++++++++++++++++------------- llvm/unittests/Support/JSONTest.cpp | 8 +++++-- 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/Support/JSON.h b/llvm/include/llvm/Support/JSON.h index 7a45dff6342e4..455673e42e970 100644 --- a/llvm/include/llvm/Support/JSON.h +++ b/llvm/include/llvm/Support/JSON.h @@ -909,6 +909,17 @@ class OStream { Contents(); objectEnd(); } + /// Emit an externally-serialized value. + /// The caller must write exactly one valid JSON value to the provided stream. + /// No validation or formatting of this value occurs. + void rawValue(llvm::function_ref Contents) { + rawValueBegin(); + Contents(OS); + rawValueEnd(); + } + void rawValue(llvm::StringRef Contents) { + rawValue([&](raw_ostream &OS) { OS << Contents; }); + } /// Emit a JavaScript comment associated with the next printed value. /// The string must be valid until the next attribute or value is emitted. /// Comments are not part of standard JSON, and many parsers reject them! @@ -939,8 +950,10 @@ class OStream { void objectEnd(); void attributeBegin(llvm::StringRef Key); void attributeEnd(); + raw_ostream &rawValueBegin(); + void rawValueEnd(); - private: +private: void attributeImpl(llvm::StringRef Key, Block Contents) { attributeBegin(Key); Contents(); @@ -955,6 +968,7 @@ class OStream { Singleton, // Top level, or object attribute. Array, Object, + RawValue, // External code writing a value to OS directly. }; struct State { Context Ctx = Singleton; diff --git a/llvm/lib/Support/JSON.cpp b/llvm/lib/Support/JSON.cpp index d44961b43b894..8471e5818cbbe 100644 --- a/llvm/lib/Support/JSON.cpp +++ b/llvm/lib/Support/JSON.cpp @@ -251,20 +251,13 @@ std::vector sortedElements(const Object &O) { // Prints a one-line version of a value that isn't our main focus. // We interleave writes to OS and JOS, exploiting the lack of extra buffering. // This is OK as we own the implementation. -// FIXME: once we have a "write custom serialized value" API, use it here. -void abbreviate(const Value &V, OStream &JOS, raw_ostream &OS) { +void abbreviate(const Value &V, OStream &JOS) { switch (V.kind()) { case Value::Array: - JOS.array([&] { - if (!V.getAsArray()->empty()) - OS << " ... "; - }); + JOS.rawValue(V.getAsArray()->empty() ? "[]" : "[ ... ]"); break; case Value::Object: - JOS.object([&] { - if (!V.getAsObject()->empty()) - OS << " ... "; - }); + JOS.rawValue(V.getAsObject()->empty() ? "{}" : "{ ... }"); break; case Value::String: { llvm::StringRef S = *V.getAsString(); @@ -284,19 +277,19 @@ void abbreviate(const Value &V, OStream &JOS, raw_ostream &OS) { // Prints a semi-expanded version of a value that is our main focus. // Array/Object entries are printed, but not recursively as they may be huge. -void abbreviateChildren(const Value &V, OStream &JOS, raw_ostream &OS) { +void abbreviateChildren(const Value &V, OStream &JOS) { switch (V.kind()) { case Value::Array: JOS.array([&] { for (const auto &I : *V.getAsArray()) - abbreviate(I, JOS, OS); + abbreviate(I, JOS); }); break; case Value::Object: JOS.object([&] { for (const auto *KV : sortedElements(*V.getAsObject())) { JOS.attributeBegin(KV->first); - abbreviate(KV->second, JOS, OS); + abbreviate(KV->second, JOS); JOS.attributeEnd(); } }); @@ -322,7 +315,7 @@ void Path::Root::printErrorContext(const Value &R, raw_ostream &OS) const { std::string Comment = "error: "; Comment.append(ErrorMessage.data(), ErrorMessage.size()); JOS.comment(Comment); - abbreviateChildren(V, JOS, OS); + abbreviateChildren(V, JOS); }; if (Path.empty()) // We reached our target. return HighlightCurrent(); @@ -339,7 +332,7 @@ void Path::Root::printErrorContext(const Value &R, raw_ostream &OS) const { if (FieldName.equals(KV->first)) Recurse(KV->second, Path.drop_back(), Recurse); else - abbreviate(KV->second, JOS, OS); + abbreviate(KV->second, JOS); JOS.attributeEnd(); } }); @@ -354,7 +347,7 @@ void Path::Root::printErrorContext(const Value &R, raw_ostream &OS) const { if (Current++ == S.index()) Recurse(V, Path.drop_back(), Recurse); else - abbreviate(V, JOS, OS); + abbreviate(V, JOS); } }); } @@ -893,6 +886,18 @@ void llvm::json::OStream::attributeEnd() { assert(Stack.back().Ctx == Object); } +raw_ostream &llvm::json::OStream::rawValueBegin() { + valueBegin(); + Stack.emplace_back(); + Stack.back().Ctx = RawValue; + return OS; +} + +void llvm::json::OStream::rawValueEnd() { + assert(Stack.back().Ctx == RawValue); + Stack.pop_back(); +} + } // namespace json } // namespace llvm diff --git a/llvm/unittests/Support/JSONTest.cpp b/llvm/unittests/Support/JSONTest.cpp index 6a93f3befc5a3..9f17c98b4db40 100644 --- a/llvm/unittests/Support/JSONTest.cpp +++ b/llvm/unittests/Support/JSONTest.cpp @@ -479,6 +479,7 @@ TEST(JSONTest, Stream) { J.arrayBegin(); J.value(43); J.arrayEnd(); + J.rawValue([](raw_ostream &OS) { OS << "'unverified\nraw value'"; }); }); J.comment("attribute"); J.attributeBegin("bar"); @@ -492,7 +493,8 @@ TEST(JSONTest, Stream) { }; const char *Plain = - R"(/*top* /level*/{"foo":[null,/*element*/42.5,[43]],/*attribute*/"bar":/*attribute value*/{},"baz":"xyz"})"; + R"(/*top* /level*/{"foo":[null,/*element*/42.5,[43],'unverified +raw value'],/*attribute*/"bar":/*attribute value*/{},"baz":"xyz"})"; EXPECT_EQ(Plain, StreamStuff(0)); const char *Pretty = R"(/* top* /level */ { @@ -502,7 +504,9 @@ TEST(JSONTest, Stream) { 42.5, [ 43 - ] + ], + 'unverified +raw value' ], /* attribute */ "bar": /* attribute value */ {}, From 3bba91f64eef15956f589fa446c265a714cc7893 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Mon, 28 Sep 2020 03:57:37 -0700 Subject: [PATCH 245/321] [WebAssembly] Rename Emscripten EH functions Renaming for some Emscripten EH functions has so far been done in wasm-emscripten-finalize tool in Binaryen. But recently we decided to make a compilation/linking path that does not rely on wasm-emscripten-finalize for modifications, so here we move that functionality to LLVM. Invoke wrappers are generated in LowerEmscriptenEHSjLj pass, but final wasm types are not available in the IR pass, we need to rename them at the end of the pipeline. This patch also removes uses of `emscripten_longjmp_jmpbuf` in LowerEmscriptenEHSjLj pass, replacing that with `emscripten_longjmp`. `emscripten_longjmp_jmpbuf` is lowered to `emscripten_longjmp`, but previously we generated calls to `emscripten_longjmp_jmpbuf` in LowerEmscriptenEHSjLj pass because it takes `jmp_buf*` instead of `i32`. But we were able use `ptrtoint` to make it use `emscripten_longjmp` directly here. Addresses: https://github.com/WebAssembly/binaryen/issues/3043 https://github.com/WebAssembly/binaryen/issues/3081 Companions: https://github.com/WebAssembly/binaryen/pull/3191 https://github.com/emscripten-core/emscripten/pull/12399 Reviewed By: dschuff, tlively, sbc100 Differential Revision: https://reviews.llvm.org/D88697 --- .../WebAssembly/WebAssemblyAsmPrinter.cpp | 124 ++++++++++++++++-- .../WebAssembly/WebAssemblyAsmPrinter.h | 3 + .../WebAssemblyLowerEmscriptenEHSjLj.cpp | 70 ++++++---- .../WebAssembly/WebAssemblyMCInstLower.cpp | 41 +++--- .../WebAssembly/WebAssemblyTargetMachine.cpp | 4 +- .../CodeGen/WebAssembly/function-bitcasts.ll | 6 +- .../lower-em-ehsjlj-multi-return.ll | 57 ++++++++ .../WebAssembly/lower-em-ehsjlj-options.ll | 57 +++++++- .../CodeGen/WebAssembly/lower-em-sjlj-sret.ll | 2 +- .../test/CodeGen/WebAssembly/lower-em-sjlj.ll | 35 +++-- 10 files changed, 310 insertions(+), 89 deletions(-) create mode 100644 llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-multi-return.ll diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 96fa13d307290..5e4cbdb429888 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -49,6 +49,8 @@ using namespace llvm; #define DEBUG_TYPE "asm-printer" extern cl::opt WasmKeepRegisters; +extern cl::opt EnableEmException; +extern cl::opt EnableEmSjLj; //===----------------------------------------------------------------------===// // Helpers. @@ -81,10 +83,91 @@ WebAssemblyTargetStreamer *WebAssemblyAsmPrinter::getTargetStreamer() { return static_cast(TS); } +// Emscripten exception handling helpers +// +// This converts invoke names generated by LowerEmscriptenEHSjLj to real names +// that are expected by JavaScript glue code. The invoke names generated by +// Emscripten JS glue code are based on their argument and return types; for +// example, for a function that takes an i32 and returns nothing, it is +// 'invoke_vi'. But the format of invoke generated by LowerEmscriptenEHSjLj pass +// contains a mangled string generated from their IR types, for example, +// "__invoke_void_%struct.mystruct*_int", because final wasm types are not +// available in the IR pass. So we convert those names to the form that +// Emscripten JS code expects. +// +// Refer to LowerEmscriptenEHSjLj pass for more details. + +// Returns true if the given function name is an invoke name generated by +// LowerEmscriptenEHSjLj pass. +static bool isEmscriptenInvokeName(StringRef Name) { + if (Name.front() == '"' && Name.back() == '"') + Name = Name.substr(1, Name.size() - 2); + return Name.startswith("__invoke_"); +} + +// Returns a character that represents the given wasm value type in invoke +// signatures. +static char getInvokeSig(wasm::ValType VT) { + switch (VT) { + case wasm::ValType::I32: + return 'i'; + case wasm::ValType::I64: + return 'j'; + case wasm::ValType::F32: + return 'f'; + case wasm::ValType::F64: + return 'd'; + case wasm::ValType::V128: + return 'V'; + case wasm::ValType::EXNREF: + return 'E'; + case wasm::ValType::EXTERNREF: + return 'X'; + } +} + +// Given the wasm signature, generate the invoke name in the format JS glue code +// expects. +static std::string getEmscriptenInvokeSymbolName(wasm::WasmSignature *Sig) { + assert(Sig->Returns.size() <= 1); + std::string Ret = "invoke_"; + if (!Sig->Returns.empty()) + for (auto VT : Sig->Returns) + Ret += getInvokeSig(VT); + else + Ret += 'v'; + // Invokes' first argument is a pointer to the original function, so skip it + for (unsigned I = 1, E = Sig->Params.size(); I < E; I++) + Ret += getInvokeSig(Sig->Params[I]); + return Ret; +} + //===----------------------------------------------------------------------===// // WebAssemblyAsmPrinter Implementation. //===----------------------------------------------------------------------===// +MCSymbolWasm *WebAssemblyAsmPrinter::getMCSymbolForFunction( + const Function *F, bool EnableEmEH, wasm::WasmSignature *Sig, + bool &InvokeDetected) { + MCSymbolWasm *WasmSym = nullptr; + if (EnableEmEH && isEmscriptenInvokeName(F->getName())) { + assert(Sig); + InvokeDetected = true; + if (Sig->Returns.size() > 1) { + std::string Msg = + "Emscripten EH/SjLj does not support multivalue returns: " + + std::string(F->getName()) + ": " + + WebAssembly::signatureToString(Sig); + report_fatal_error(Msg); + } + WasmSym = cast( + GetExternalSymbolSymbol(getEmscriptenInvokeSymbolName(Sig))); + } else { + WasmSym = cast(getSymbol(F)); + } + return WasmSym; +} + void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) { for (auto &It : OutContext.getSymbols()) { // Emit a .globaltype and .eventtype declaration. @@ -95,6 +178,7 @@ void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) { getTargetStreamer()->emitEventType(Sym); } + DenseSet InvokeSymbols; for (const auto &F : M) { if (F.isIntrinsic()) continue; @@ -104,31 +188,46 @@ void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) { SmallVector Results; SmallVector Params; computeSignatureVTs(F.getFunctionType(), &F, F, TM, Params, Results); - auto *Sym = cast(getSymbol(&F)); + // At this point these MCSymbols may or may not have been created already + // and thus also contain a signature, but we need to get the signature + // anyway here in case it is an invoke that has not yet been created. We + // will discard it later if it turns out not to be necessary. + auto Signature = signatureFromMVTs(Results, Params); + bool InvokeDetected = false; + auto *Sym = getMCSymbolForFunction(&F, EnableEmException || EnableEmSjLj, + Signature.get(), InvokeDetected); + + // Multiple functions can be mapped to the same invoke symbol. For + // example, two IR functions '__invoke_void_i8*' and '__invoke_void_i32' + // are both mapped to '__invoke_vi'. We keep them in a set once we emit an + // Emscripten EH symbol so we don't emit the same symbol twice. + if (InvokeDetected && !InvokeSymbols.insert(Sym).second) + continue; + Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); if (!Sym->getSignature()) { - auto Signature = signatureFromMVTs(Results, Params); Sym->setSignature(Signature.get()); addSignature(std::move(Signature)); + } else { + // This symbol has already been created and had a signature. Discard it. + Signature.reset(); } - // FIXME: this was originally intended for post-linking and was only used - // for imports that were only called indirectly (i.e. s2wasm could not - // infer the type from a call). With object files it applies to all - // imports. so fix the names and the tests, or rethink how import - // delcarations work in asm files. + getTargetStreamer()->emitFunctionType(Sym); - if (TM.getTargetTriple().isOSBinFormatWasm() && - F.hasFnAttribute("wasm-import-module")) { + if (F.hasFnAttribute("wasm-import-module")) { StringRef Name = F.getFnAttribute("wasm-import-module").getValueAsString(); Sym->setImportModule(storeName(Name)); getTargetStreamer()->emitImportModule(Sym, Name); } - if (TM.getTargetTriple().isOSBinFormatWasm() && - F.hasFnAttribute("wasm-import-name")) { + if (F.hasFnAttribute("wasm-import-name")) { + // If this is a converted Emscripten EH/SjLj symbol, we shouldn't use + // the original function name but the converted symbol name. StringRef Name = - F.getFnAttribute("wasm-import-name").getValueAsString(); + InvokeDetected + ? Sym->getName() + : F.getFnAttribute("wasm-import-name").getValueAsString(); Sym->setImportName(storeName(Name)); getTargetStreamer()->emitImportName(Sym, Name); } @@ -304,7 +403,6 @@ void WebAssemblyAsmPrinter::emitFunctionBodyStart() { addSignature(std::move(Signature)); WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); - // FIXME: clean up how params and results are emitted (use signatures) getTargetStreamer()->emitFunctionType(WasmSym); // Emit the function index. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h index d9281568638d7..7a6a3247a19fb 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h @@ -77,6 +77,9 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyAsmPrinter final : public AsmPrinter { MVT getRegType(unsigned RegNo) const; std::string regToString(const MachineOperand &MO); WebAssemblyTargetStreamer *getTargetStreamer(); + MCSymbolWasm *getMCSymbolForFunction(const Function *F, bool EnableEmEH, + wasm::WasmSignature *Sig, + bool &InvokeDetected); }; } // end namespace llvm diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index 24eeafbbed7af..beefbc12c89af 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -140,8 +140,7 @@ /// 1) Lower /// longjmp(buf, value) /// into -/// emscripten_longjmp_jmpbuf(buf, value) -/// emscripten_longjmp_jmpbuf will be lowered to emscripten_longjmp later. +/// emscripten_longjmp(buf, value) /// /// In case calls to setjmp() exists /// @@ -196,14 +195,9 @@ /// stored in saveSetjmp. testSetjmp returns a setjmp label, a unique ID to /// each setjmp callsite. Label 0 means this longjmp buffer does not /// correspond to one of the setjmp callsites in this function, so in this -/// case we just chain the longjmp to the caller. (Here we call -/// emscripten_longjmp, which is different from emscripten_longjmp_jmpbuf. -/// emscripten_longjmp_jmpbuf takes jmp_buf as its first argument, while -/// emscripten_longjmp takes an int. Both of them will eventually be lowered -/// to emscripten_longjmp in s2wasm, but here we need two signatures - we -/// can't translate an int value to a jmp_buf.) -/// Label -1 means no longjmp occurred. Otherwise we jump to the right -/// post-setjmp BB based on the label. +/// case we just chain the longjmp to the caller. Label -1 means no longjmp +/// occurred. Otherwise we jump to the right post-setjmp BB based on the +/// label. /// ///===----------------------------------------------------------------------===// @@ -241,7 +235,6 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { Function *ResumeF = nullptr; Function *EHTypeIDF = nullptr; Function *EmLongjmpF = nullptr; - Function *EmLongjmpJmpbufF = nullptr; Function *SaveSetjmpF = nullptr; Function *TestSetjmpF = nullptr; @@ -642,6 +635,30 @@ void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) { } } +// Replace uses of longjmp with emscripten_longjmp. emscripten_longjmp takes +// arguments of type {i32, i32} and longjmp takes {jmp_buf*, i32}, so we need a +// ptrtoint instruction here to make the type match. jmp_buf* will eventually be +// lowered to i32 in the wasm backend. +static void replaceLongjmpWithEmscriptenLongjmp(Function *LongjmpF, + Function *EmLongjmpF) { + SmallVector ToErase; + LLVMContext &C = LongjmpF->getParent()->getContext(); + IRBuilder<> IRB(C); + for (User *U : LongjmpF->users()) { + auto *CI = dyn_cast(U); + if (!CI) + report_fatal_error("Does not support indirect calls to longjmp"); + IRB.SetInsertPoint(CI); + Value *Jmpbuf = + IRB.CreatePtrToInt(CI->getArgOperand(0), IRB.getInt32Ty(), "jmpbuf"); + IRB.CreateCall(EmLongjmpF, {Jmpbuf, CI->getArgOperand(1)}); + ToErase.push_back(CI); + } + + for (auto *I : ToErase) + I->eraseFromParent(); +} + bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { LLVM_DEBUG(dbgs() << "********** Lower Emscripten EH & SjLj **********\n"); @@ -654,6 +671,10 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty(); bool DoSjLj = EnableSjLj && (SetjmpUsed || LongjmpUsed); + if ((EnableEH || DoSjLj) && + Triple(M.getTargetTriple()).getArch() == Triple::wasm64) + report_fatal_error("Emscripten EH/SjLj is not supported with wasm64 yet"); + auto *TPC = getAnalysisIfAvailable(); assert(TPC && "Expected a TargetPassConfig"); auto &TM = TPC->getTM(); @@ -696,22 +717,21 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { if (DoSjLj) { Changed = true; // We have setjmp or longjmp somewhere - if (LongjmpF) { - // Replace all uses of longjmp with emscripten_longjmp_jmpbuf, which is - // defined in JS code - EmLongjmpJmpbufF = getEmscriptenFunction(LongjmpF->getFunctionType(), - "emscripten_longjmp_jmpbuf", &M); - LongjmpF->replaceAllUsesWith(EmLongjmpJmpbufF); - } + // Register emscripten_longjmp function + FunctionType *FTy = FunctionType::get( + IRB.getVoidTy(), {IRB.getInt32Ty(), IRB.getInt32Ty()}, false); + EmLongjmpF = getEmscriptenFunction(FTy, "emscripten_longjmp", &M); + + if (LongjmpF) + replaceLongjmpWithEmscriptenLongjmp(LongjmpF, EmLongjmpF); if (SetjmpF) { // Register saveSetjmp function FunctionType *SetjmpFTy = SetjmpF->getFunctionType(); - FunctionType *FTy = - FunctionType::get(Type::getInt32PtrTy(C), - {SetjmpFTy->getParamType(0), IRB.getInt32Ty(), - Type::getInt32PtrTy(C), IRB.getInt32Ty()}, - false); + FTy = FunctionType::get(Type::getInt32PtrTy(C), + {SetjmpFTy->getParamType(0), IRB.getInt32Ty(), + Type::getInt32PtrTy(C), IRB.getInt32Ty()}, + false); SaveSetjmpF = getEmscriptenFunction(FTy, "saveSetjmp", &M); // Register testSetjmp function @@ -720,10 +740,6 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()}, false); TestSetjmpF = getEmscriptenFunction(FTy, "testSetjmp", &M); - FTy = FunctionType::get(IRB.getVoidTy(), - {IRB.getInt32Ty(), IRB.getInt32Ty()}, false); - EmLongjmpF = getEmscriptenFunction(FTy, "emscripten_longjmp", &M); - // Only traverse functions that uses setjmp in order not to insert // unnecessary prep / cleanup code in every function SmallPtrSet SetjmpUsers; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index 304dca2ebfe4a..7774a0d71be3a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -38,29 +38,34 @@ cl::opt " instruction output for test purposes only."), cl::init(false)); +extern cl::opt EnableEmException; +extern cl::opt EnableEmSjLj; + static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI); MCSymbol * WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { const GlobalValue *Global = MO.getGlobal(); - auto *WasmSym = cast(Printer.getSymbol(Global)); - - if (const auto *FuncTy = dyn_cast(Global->getValueType())) { - const MachineFunction &MF = *MO.getParent()->getParent()->getParent(); - const TargetMachine &TM = MF.getTarget(); - const Function &CurrentFunc = MF.getFunction(); - - SmallVector ResultMVTs; - SmallVector ParamMVTs; - const auto *const F = dyn_cast(Global); - computeSignatureVTs(FuncTy, F, CurrentFunc, TM, ParamMVTs, ResultMVTs); - - auto Signature = signatureFromMVTs(ResultMVTs, ParamMVTs); - WasmSym->setSignature(Signature.get()); - Printer.addSignature(std::move(Signature)); - WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); - } - + if (!isa(Global)) + return cast(Printer.getSymbol(Global)); + + const auto *FuncTy = cast(Global->getValueType()); + const MachineFunction &MF = *MO.getParent()->getParent()->getParent(); + const TargetMachine &TM = MF.getTarget(); + const Function &CurrentFunc = MF.getFunction(); + + SmallVector ResultMVTs; + SmallVector ParamMVTs; + const auto *const F = dyn_cast(Global); + computeSignatureVTs(FuncTy, F, CurrentFunc, TM, ParamMVTs, ResultMVTs); + auto Signature = signatureFromMVTs(ResultMVTs, ParamMVTs); + + bool InvokeDetected = false; + auto *WasmSym = Printer.getMCSymbolForFunction( + F, EnableEmException || EnableEmSjLj, Signature.get(), InvokeDetected); + WasmSym->setSignature(Signature.get()); + Printer.addSignature(std::move(Signature)); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); return WasmSym; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 97d9e00b7b239..78acc2a341562 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -34,13 +34,13 @@ using namespace llvm; #define DEBUG_TYPE "wasm" // Emscripten's asm.js-style exception handling -static cl::opt EnableEmException( +cl::opt EnableEmException( "enable-emscripten-cxx-exceptions", cl::desc("WebAssembly Emscripten-style exception handling"), cl::init(false)); // Emscripten's asm.js-style setjmp/longjmp handling -static cl::opt EnableEmSjLj( +cl::opt EnableEmSjLj( "enable-emscripten-sjlj", cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"), cl::init(false)); diff --git a/llvm/test/CodeGen/WebAssembly/function-bitcasts.ll b/llvm/test/CodeGen/WebAssembly/function-bitcasts.ll index 528d7d8ddc053..8ff0a4d0d94ad 100644 --- a/llvm/test/CodeGen/WebAssembly/function-bitcasts.ll +++ b/llvm/test/CodeGen/WebAssembly/function-bitcasts.ll @@ -153,12 +153,12 @@ define void @test_argument() { ; CHECK-LABEL: test_invoke: ; CHECK: i32.const $push[[L1:[0-9]+]]=, call_func{{$}} ; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, has_i32_ret{{$}} -; CHECK-NEXT: call "__invoke_void_i32()*", $pop[[L1]], $pop[[L0]]{{$}} +; CHECK-NEXT: call invoke_vi, $pop[[L1]], $pop[[L0]]{{$}} ; CHECK: i32.const $push[[L3:[0-9]+]]=, call_func{{$}} ; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, has_i32_arg{{$}} -; CHECK-NEXT: call "__invoke_void_i32()*", $pop[[L3]], $pop[[L2]]{{$}} +; CHECK-NEXT: call invoke_vi, $pop[[L3]], $pop[[L2]]{{$}} ; CHECK: i32.const $push[[L4:[0-9]+]]=, .Lhas_i32_arg_bitcast.2{{$}} -; CHECK-NEXT: call __invoke_void, $pop[[L4]]{{$}} +; CHECK-NEXT: call invoke_v, $pop[[L4]]{{$}} declare i32 @personality(...) define void @test_invoke() personality i32 (...)* @personality { entry: diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-multi-return.ll b/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-multi-return.ll new file mode 100644 index 0000000000000..3477388f6969e --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-multi-return.ll @@ -0,0 +1,57 @@ +; RUN: not --crash llc < %s -enable-emscripten-cxx-exceptions -mattr=+multivalue 2>&1 | FileCheck %s --check-prefix=EH +; RUN: not --crash llc < %s -enable-emscripten-sjlj -mattr=+multivalue 2>&1 | FileCheck %s --check-prefix=SJLJ + +; Currently multivalue returning functions are not supported in Emscripten EH / +; SjLj. Make sure they error out. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +%struct.__jmp_buf_tag = type { [6 x i32], i32, [32 x i32] } + +define void @exception() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + invoke {i32, i32} @foo(i32 3) + to label %try.cont unwind label %lpad + +lpad: ; preds = %entry + %1 = landingpad { i8*, i32 } + catch i8* null + %2 = extractvalue { i8*, i32 } %1, 0 + %3 = extractvalue { i8*, i32 } %1, 1 + %4 = call i8* @__cxa_begin_catch(i8* %2) #2 + call void @__cxa_end_catch() + br label %try.cont + +try.cont: ; preds = %entry, %lpad + ret void +} + +define void @setjmp_longjmp() { +entry: + %buf = alloca [1 x %struct.__jmp_buf_tag], align 16 + %arraydecay = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %buf, i32 0, i32 0 + %call = call i32 @setjmp(%struct.__jmp_buf_tag* %arraydecay) #0 + %arraydecay1 = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %buf, i32 0, i32 0 + call {i32, i32} @foo(i32 3) + call void @longjmp(%struct.__jmp_buf_tag* %arraydecay1, i32 1) #1 + unreachable +} + +declare {i32, i32} @foo(i32) +declare i32 @__gxx_personality_v0(...) +declare i8* @__cxa_begin_catch(i8*) +declare void @__cxa_end_catch() +; Function Attrs: returns_twice +declare i32 @setjmp(%struct.__jmp_buf_tag*) #0 +; Function Attrs: noreturn +declare void @longjmp(%struct.__jmp_buf_tag*, i32) #1 +declare i8* @malloc(i32) +declare void @free(i8*) + +attributes #0 = { returns_twice } +attributes #1 = { noreturn } +attributes #2 = { nounwind } + +; EH: LLVM ERROR: Emscripten EH/SjLj does not support multivalue returns +; SJLJ: LLVM ERROR: Emscripten EH/SjLj does not support multivalue returns diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll b/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll index 46ae9a194f07c..3428d5c08af3f 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll @@ -1,20 +1,30 @@ ; RUN: llc < %s -enable-emscripten-cxx-exceptions | FileCheck %s --check-prefix=EH ; RUN: llc < %s -enable-emscripten-sjlj | FileCheck %s --check-prefix=SJLJ ; RUN: llc < %s | FileCheck %s --check-prefix=NONE +; RUN: not --crash llc < %s -enable-emscripten-cxx-exceptions -mtriple=wasm64-unknown-unknown 2>&1 | FileCheck %s --check-prefix=WASM64-EH +; RUN: not --crash llc < %s -enable-emscripten-sjlj -mtriple=wasm64-unknown-unknown 2>&1 | FileCheck %s --check-prefix=WASM64-SJLJ target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" %struct.__jmp_buf_tag = type { [6 x i32], i32, [32 x i32] } -define hidden void @exception() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define void @exception() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; EH-LABEL: type exception,@function ; NONE-LABEL: type exception,@function entry: - invoke void @foo() + invoke void @foo(i32 3) + to label %invoke.cont unwind label %lpad +; EH: call invoke_vi +; EH-NOT: call __invoke_void_i32 +; NONE: call foo + +invoke.cont: + invoke void @bar() to label %try.cont unwind label %lpad -; EH: call __invoke_void -; NONE: call foo +; EH: call invoke_v +; EH-NOT: call __invoke_void +; NONE: call bar lpad: ; preds = %entry %0 = landingpad { i8*, i32 } @@ -29,7 +39,7 @@ try.cont: ; preds = %entry, %lpad ret void } -define hidden void @setjmp_longjmp() { +define void @setjmp_longjmp() { ; SJLJ-LABEL: type setjmp_longjmp,@function ; NONE-LABEL: type setjmp_longjmp,@function entry: @@ -40,12 +50,30 @@ entry: call void @longjmp(%struct.__jmp_buf_tag* %arraydecay1, i32 1) #1 unreachable ; SJLJ: call saveSetjmp +; SJLJ: i32.const emscripten_longjmp +; SJLJ-NOT: i32.const emscripten_longjmp_jmpbuf +; SJLJ: call invoke_vii +; SJLJ-NOT: call "__invoke_void_%struct.__jmp_buf_tag*_i32" ; SJLJ: call testSetjmp + ; NONE: call setjmp ; NONE: call longjmp } -declare void @foo() +; Tests whether a user function with 'invoke_' prefix can be used +declare void @invoke_ignoreme() +define void @test_invoke_ignoreme() { +; EH-LABEL: type test_invoke_ignoreme,@function +; SJLJ-LABEL: type test_invoke_ignoreme,@function +entry: + call void @invoke_ignoreme() +; EH: call invoke_ignoreme +; SJLJ: call invoke_ignoreme + ret void +} + +declare void @foo(i32) +declare void @bar() declare i32 @__gxx_personality_v0(...) declare i8* @__cxa_begin_catch(i8*) declare void @__cxa_end_catch() @@ -59,3 +87,20 @@ declare void @free(i8*) attributes #0 = { returns_twice } attributes #1 = { noreturn } attributes #2 = { nounwind } + +; EH: .functype invoke_vi (i32, i32) -> () +; EH: .import_module invoke_vi, env +; EH: .import_name invoke_vi, invoke_vi +; EH-NOT: .functype __invoke_void_i32 +; EH-NOT: .import_module __invoke_void_i32 +; EH-NOT: .import_name __invoke_void_i32 + +; SJLJ: .functype emscripten_longjmp (i32, i32) -> () +; SJLJ: .import_module emscripten_longjmp, env +; SJLJ: .import_name emscripten_longjmp, emscripten_longjmp +; SJLJ-NOT: .functype emscripten_longjmp_jmpbuf +; SJLJ-NOT: .import_module emscripten_longjmp_jmpbuf +; SJLJ-NOT: .import_name emscripten_longjmp_jmpbuf + +; WASM64-EH: LLVM ERROR: Emscripten EH/SjLj is not supported with wasm64 yet +; WASM64-SJLJ: LLVM ERROR: Emscripten EH/SjLj is not supported with wasm64 yet diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-sret.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-sret.ll index eab3b53d578d0..aa5d3cc3d3a12 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-sret.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-sret.ll @@ -19,7 +19,7 @@ entry: ; It needs to be the first argument (that's what we're testing here) ; CHECK: i32.const $push[[FPTR:[0-9]+]]=, returns_struct ; This is the sret stack region (as an offset from the stack pointer local) - ; CHECK: call "__invoke_{i32.i32}", $pop[[FPTR]] + ; CHECK: call invoke_vi, $pop[[FPTR]] %ret = call {i32, i32} @returns_struct() ret {i32, i32} %ret } diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll index 9d21ab5e78412..d5247b7da0ff3 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll @@ -36,8 +36,9 @@ entry: ; CHECK: entry.split: ; CHECK-NEXT: phi i32 [ 0, %entry ], [ %[[LONGJMP_RESULT:.*]], %if.end ] ; CHECK-NEXT: %[[ARRAYDECAY1:.*]] = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %[[BUF]], i32 0, i32 0 +; CHECK-NEXT: %[[JMPBUF:.*]] = ptrtoint %struct.__jmp_buf_tag* %[[ARRAYDECAY1]] to i32 ; CHECK-NEXT: store i32 0, i32* @__THREW__ -; CHECK-NEXT: call cc{{.*}} void @"__invoke_void_%struct.__jmp_buf_tag*_i32"(void (%struct.__jmp_buf_tag*, i32)* @emscripten_longjmp_jmpbuf, %struct.__jmp_buf_tag* %[[ARRAYDECAY1]], i32 1) +; CHECK-NEXT: call cc{{.*}} void @__invoke_void_i32_i32(void (i32, i32)* @emscripten_longjmp, i32 %[[JMPBUF]], i32 1) ; CHECK-NEXT: %[[__THREW__VAL:.*]] = load i32, i32* @__THREW__ ; CHECK-NEXT: store i32 0, i32* @__THREW__ ; CHECK-NEXT: %[[CMP0:.*]] = icmp ne i32 %__THREW__.val, 0 @@ -187,8 +188,8 @@ entry: %arraydecay = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %buf, i32 0, i32 0 call void @longjmp(%struct.__jmp_buf_tag* %arraydecay, i32 5) #1 unreachable -; CHECK: %[[ARRAYDECAY:.*]] = getelementptr inbounds -; CHECK-NEXT: call void @emscripten_longjmp_jmpbuf(%struct.__jmp_buf_tag* %[[ARRAYDECAY]], i32 5) +; CHECK: %[[JMPBUF:.*]] = ptrtoint +; CHECK-NEXT: call void @emscripten_longjmp(i32 %[[JMPBUF]], i32 5) } ; Test inline asm handling @@ -224,7 +225,7 @@ entry: @buffer = global [1 x %struct.__jmp_buf_tag] zeroinitializer, align 16 define void @longjmp_only() { entry: - ; CHECK: call void @emscripten_longjmp_jmpbuf + ; CHECK: call void @emscripten_longjmp call void @longjmp(%struct.__jmp_buf_tag* getelementptr inbounds ([1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* @buffer, i32 0, i32 0), i32 1) #1 unreachable } @@ -265,28 +266,24 @@ declare void @free(i8*) ; CHECK-DAG: declare void @setTempRet0(i32) ; CHECK-DAG: declare i32* @saveSetjmp(%struct.__jmp_buf_tag*, i32, i32*, i32) ; CHECK-DAG: declare i32 @testSetjmp(i32, i32*, i32) -; CHECK-DAG: declare void @emscripten_longjmp_jmpbuf(%struct.__jmp_buf_tag*, i32) ; CHECK-DAG: declare void @emscripten_longjmp(i32, i32) ; CHECK-DAG: declare void @__invoke_void(void ()*) -; CHECK-DAG: declare void @"__invoke_void_%struct.__jmp_buf_tag*_i32"(void (%struct.__jmp_buf_tag*, i32)*, %struct.__jmp_buf_tag*, i32) attributes #0 = { returns_twice } attributes #1 = { noreturn } attributes #2 = { nounwind } attributes #3 = { allocsize(0) } -; CHECK: attributes #{{[0-9]+}} = { nounwind "wasm-import-module"="env" "wasm-import-name"="getTempRet0" } -; CHECK: attributes #{{[0-9]+}} = { nounwind "wasm-import-module"="env" "wasm-import-name"="setTempRet0" } -; CHECK: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="__resumeException" } -; CHECK: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="llvm_eh_typeid_for" } -; CHECK: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="__invoke_void" } -; CHECK: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="__cxa_find_matching_catch_3" } -; CHECK: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="emscripten_longjmp_jmpbuf" } -; CHECK: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="saveSetjmp" } -; CHECK: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="testSetjmp" } -; CHECK: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="emscripten_longjmp" } -; CHECK: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="__invoke_i8*_i32_%struct.__jmp_buf_tag*" } -; CHECK: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="__invoke_void_%struct.__jmp_buf_tag*_i32" } -; CHECK: attributes #[[ALLOCSIZE_ATTR]] = { allocsize(1) } +; CHECK-DAG: attributes #{{[0-9]+}} = { nounwind "wasm-import-module"="env" "wasm-import-name"="getTempRet0" } +; CHECK-DAG: attributes #{{[0-9]+}} = { nounwind "wasm-import-module"="env" "wasm-import-name"="setTempRet0" } +; CHECK-DAG: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="__resumeException" } +; CHECK-DAG: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="llvm_eh_typeid_for" } +; CHECK-DAG: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="__invoke_void" } +; CHECK-DAG: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="__cxa_find_matching_catch_3" } +; CHECK-DAG: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="saveSetjmp" } +; CHECK-DAG: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="testSetjmp" } +; CHECK-DAG: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="emscripten_longjmp" } +; CHECK-DAG: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="__invoke_i8*_i32_%struct.__jmp_buf_tag*" } +; CHECK-DAG: attributes #[[ALLOCSIZE_ATTR]] = { allocsize(1) } !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!0} From 4aa217160e5f06a96c6effc4950c3b402374de58 Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Tue, 6 Oct 2020 23:01:20 -0700 Subject: [PATCH 246/321] [mlir][CAPI] Attribute set/remove on operations. * New functions: mlirOperationSetAttributeByName, mlirOperationRemoveAttributeByName * Also adds some *IsNull checks and standardizes the rest to use "static inline" form, which makes them all non-opaque and not part of the ABI (which is desirable). * Changes needed to resolve TODOs in npcomp PyTorch capture. Differential Revision: https://reviews.llvm.org/D88946 --- mlir/include/mlir-c/IR.h | 32 +++++++++++++++------ mlir/lib/CAPI/IR/IR.cpp | 20 +++++++------- mlir/test/CAPI/ir.c | 60 ++++++++++++++++++++++++++++++++++------ 3 files changed, 85 insertions(+), 27 deletions(-) diff --git a/mlir/include/mlir-c/IR.h b/mlir/include/mlir-c/IR.h index c751da804097d..b2a17869e2b39 100644 --- a/mlir/include/mlir-c/IR.h +++ b/mlir/include/mlir-c/IR.h @@ -92,7 +92,9 @@ MlirContext mlirContextCreate(); int mlirContextEqual(MlirContext ctx1, MlirContext ctx2); /** Checks whether a context is null. */ -inline int mlirContextIsNull(MlirContext context) { return !context.ptr; } +static inline int mlirContextIsNull(MlirContext context) { + return !context.ptr; +} /** Takes an MLIR context owned by the caller and destroys it. */ void mlirContextDestroy(MlirContext context); @@ -127,7 +129,9 @@ MlirDialect mlirContextGetOrLoadDialect(MlirContext context, MlirContext mlirDialectGetContext(MlirDialect dialect); /** Checks if the dialect is null. */ -int mlirDialectIsNull(MlirDialect dialect); +static inline int mlirDialectIsNull(MlirDialect dialect) { + return !dialect.ptr; +} /** Checks if two dialects that belong to the same context are equal. Dialects * from different contexts will not compare equal. */ @@ -171,7 +175,7 @@ MlirModule mlirModuleCreateParse(MlirContext context, const char *module); MlirContext mlirModuleGetContext(MlirModule module); /** Checks whether a module is null. */ -inline int mlirModuleIsNull(MlirModule module) { return !module.ptr; } +static inline int mlirModuleIsNull(MlirModule module) { return !module.ptr; } /** Takes a module owned by the caller and deletes it. */ void mlirModuleDestroy(MlirModule module); @@ -235,7 +239,7 @@ MlirOperation mlirOperationCreate(const MlirOperationState *state); void mlirOperationDestroy(MlirOperation op); /** Checks whether the underlying operation is null. */ -int mlirOperationIsNull(MlirOperation op); +static inline int mlirOperationIsNull(MlirOperation op) { return !op.ptr; } /** Returns the number of regions attached to the given operation. */ intptr_t mlirOperationGetNumRegions(MlirOperation op); @@ -275,6 +279,15 @@ MlirNamedAttribute mlirOperationGetAttribute(MlirOperation op, intptr_t pos); MlirAttribute mlirOperationGetAttributeByName(MlirOperation op, const char *name); +/** Sets an attribute by name, replacing the existing if it exists or + * adding a new one otherwise. */ +void mlirOperationSetAttributeByName(MlirOperation op, const char *name, + MlirAttribute attr); + +/** Removes an attribute by name. Returns 0 if the attribute was not found + * and !0 if removed. */ +int mlirOperationRemoveAttributeByName(MlirOperation op, const char *name); + /** Prints an operation by sending chunks of the string representation and * forwarding `userData to `callback`. Note that the callback may be called * several times with consecutive chunks of the string. */ @@ -295,7 +308,7 @@ MlirRegion mlirRegionCreate(); void mlirRegionDestroy(MlirRegion region); /** Checks whether a region is null. */ -int mlirRegionIsNull(MlirRegion region); +static inline int mlirRegionIsNull(MlirRegion region) { return !region.ptr; } /** Gets the first block in the region. */ MlirBlock mlirRegionGetFirstBlock(MlirRegion region); @@ -333,7 +346,7 @@ MlirBlock mlirBlockCreate(intptr_t nArgs, MlirType *args); void mlirBlockDestroy(MlirBlock block); /** Checks whether a block is null. */ -int mlirBlockIsNull(MlirBlock block); +static inline int mlirBlockIsNull(MlirBlock block) { return !block.ptr; } /** Returns the block immediately following the given block in its parent * region. */ @@ -381,6 +394,9 @@ void mlirBlockPrint(MlirBlock block, MlirStringCallback callback, /* Value API. */ /*============================================================================*/ +/** Returns whether the value is null. */ +static inline int mlirValueIsNull(MlirValue value) { return !value.ptr; } + /** Returns the type of the value. */ MlirType mlirValueGetType(MlirValue value); @@ -401,7 +417,7 @@ MlirType mlirTypeParseGet(MlirContext context, const char *type); MlirContext mlirTypeGetContext(MlirType type); /** Checks whether a type is null. */ -inline int mlirTypeIsNull(MlirType type) { return !type.ptr; } +static inline int mlirTypeIsNull(MlirType type) { return !type.ptr; } /** Checks if two types are equal. */ int mlirTypeEqual(MlirType t1, MlirType t2); @@ -425,7 +441,7 @@ MlirAttribute mlirAttributeParseGet(MlirContext context, const char *attr); MlirContext mlirAttributeGetContext(MlirAttribute attribute); /** Checks whether an attribute is null. */ -inline int mlirAttributeIsNull(MlirAttribute attr) { return !attr.ptr; } +static inline int mlirAttributeIsNull(MlirAttribute attr) { return !attr.ptr; } /** Checks if two attributes are equal. */ int mlirAttributeEqual(MlirAttribute a1, MlirAttribute a2); diff --git a/mlir/lib/CAPI/IR/IR.cpp b/mlir/lib/CAPI/IR/IR.cpp index 359ee69708eb7..45cd009bddc0d 100644 --- a/mlir/lib/CAPI/IR/IR.cpp +++ b/mlir/lib/CAPI/IR/IR.cpp @@ -66,10 +66,6 @@ MlirContext mlirDialectGetContext(MlirDialect dialect) { return wrap(unwrap(dialect)->getContext()); } -int mlirDialectIsNull(MlirDialect dialect) { - return unwrap(dialect) == nullptr; -} - int mlirDialectEqual(MlirDialect dialect1, MlirDialect dialect2) { return unwrap(dialect1) == unwrap(dialect2); } @@ -215,8 +211,6 @@ MlirOperation mlirOperationCreate(const MlirOperationState *state) { void mlirOperationDestroy(MlirOperation op) { unwrap(op)->erase(); } -int mlirOperationIsNull(MlirOperation op) { return unwrap(op) == nullptr; } - intptr_t mlirOperationGetNumRegions(MlirOperation op) { return static_cast(unwrap(op)->getNumRegions()); } @@ -267,6 +261,16 @@ MlirAttribute mlirOperationGetAttributeByName(MlirOperation op, return wrap(unwrap(op)->getAttr(name)); } +void mlirOperationSetAttributeByName(MlirOperation op, const char *name, + MlirAttribute attr) { + unwrap(op)->setAttr(name, unwrap(attr)); +} + +int mlirOperationRemoveAttributeByName(MlirOperation op, const char *name) { + auto removeResult = unwrap(op)->removeAttr(name); + return removeResult == MutableDictionaryAttr::RemoveResult::Removed; +} + void mlirOperationPrint(MlirOperation op, MlirStringCallback callback, void *userData) { detail::CallbackOstream stream(callback, userData); @@ -328,8 +332,6 @@ void mlirRegionDestroy(MlirRegion region) { delete static_cast(region.ptr); } -int mlirRegionIsNull(MlirRegion region) { return unwrap(region) == nullptr; } - /* ========================================================================== */ /* Block API. */ /* ========================================================================== */ @@ -391,8 +393,6 @@ void mlirBlockInsertOwnedOperationBefore(MlirBlock block, void mlirBlockDestroy(MlirBlock block) { delete unwrap(block); } -int mlirBlockIsNull(MlirBlock block) { return unwrap(block) == nullptr; } - intptr_t mlirBlockGetNumArguments(MlirBlock block) { return static_cast(unwrap(block)->getNumArguments()); } diff --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c index 18c4e8b085597..8eab4ebb38581 100644 --- a/mlir/test/CAPI/ir.c +++ b/mlir/test/CAPI/ir.c @@ -216,7 +216,7 @@ static void printToStderr(const char *str, intptr_t len, void *userData) { fwrite(str, 1, len, stderr); } -static void printFirstOfEach(MlirOperation operation) { +static void printFirstOfEach(MlirContext ctx, MlirOperation operation) { // Assuming we are given a module, go to the first operation of the first // function. MlirRegion region = mlirOperationGetRegion(operation, 0); @@ -227,24 +227,59 @@ static void printFirstOfEach(MlirOperation operation) { operation = mlirBlockGetFirstOperation(block); // In the module we created, the first operation of the first function is an - // "std.dim", which has an attribute an a single result that we can use to + // "std.dim", which has an attribute and a single result that we can use to // test the printing mechanism. mlirBlockPrint(block, printToStderr, NULL); fprintf(stderr, "\n"); + fprintf(stderr, "First operation: "); mlirOperationPrint(operation, printToStderr, NULL); fprintf(stderr, "\n"); - MlirNamedAttribute namedAttr = mlirOperationGetAttribute(operation, 0); - mlirAttributePrint(namedAttr.attribute, printToStderr, NULL); + // Get the attribute by index. + MlirNamedAttribute namedAttr0 = mlirOperationGetAttribute(operation, 0); + fprintf(stderr, "Get attr 0: "); + mlirAttributePrint(namedAttr0.attribute, printToStderr, NULL); fprintf(stderr, "\n"); + // Now re-get the attribute by name. + MlirAttribute attr0ByName = + mlirOperationGetAttributeByName(operation, namedAttr0.name); + fprintf(stderr, "Get attr 0 by name: "); + mlirAttributePrint(attr0ByName, printToStderr, NULL); + fprintf(stderr, "\n"); + + // Get a non-existing attribute and assert that it is null (sanity). + fprintf(stderr, "does_not_exist is null: %d\n", + mlirAttributeIsNull( + mlirOperationGetAttributeByName(operation, "does_not_exist"))); + + // Get result 0 and its type. MlirValue value = mlirOperationGetResult(operation, 0); + fprintf(stderr, "Result 0: "); mlirValuePrint(value, printToStderr, NULL); fprintf(stderr, "\n"); + fprintf(stderr, "Value is null: %d\n", mlirValueIsNull(value)); MlirType type = mlirValueGetType(value); + fprintf(stderr, "Result 0 type: "); mlirTypePrint(type, printToStderr, NULL); fprintf(stderr, "\n"); + + // Set a custom attribute. + mlirOperationSetAttributeByName(operation, "custom_attr", + mlirBoolAttrGet(ctx, 1)); + fprintf(stderr, "Op with set attr: "); + mlirOperationPrint(operation, printToStderr, NULL); + fprintf(stderr, "\n"); + + // Remove the attribute. + fprintf(stderr, "Remove attr: %d\n", + mlirOperationRemoveAttributeByName(operation, "custom_attr")); + fprintf(stderr, "Remove attr again: %d\n", + mlirOperationRemoveAttributeByName(operation, "custom_attr")); + fprintf(stderr, "Removed attr is null: %d\n", + mlirAttributeIsNull( + mlirOperationGetAttributeByName(operation, "custom_attr"))); } /// Creates an operation with a region containing multiple blocks with @@ -884,7 +919,7 @@ int main() { // CHECK: Number of values: 9 // clang-format on - printFirstOfEach(module); + printFirstOfEach(ctx, module); // clang-format off // CHECK: %[[C0:.*]] = constant 0 : index // CHECK: %[[DIM:.*]] = dim %{{.*}}, %[[C0]] : memref @@ -896,10 +931,17 @@ int main() { // CHECK: store %[[SUM]], %{{.*}}[%[[I]]] : memref // CHECK: } // CHECK: return - // CHECK: constant 0 : index - // CHECK: 0 : index - // CHECK: constant 0 : index - // CHECK: index + // CHECK: First operation: {{.*}} = constant 0 : index + // CHECK: Get attr 0: 0 : index + // CHECK: Get attr 0 by name: 0 : index + // CHECK: does_not_exist is null: 1 + // CHECK: Result 0: {{.*}} = constant 0 : index + // CHECK: Value is null: 0 + // CHECK: Result 0 type: index + // CHECK: Op with set attr: {{.*}} {custom_attr = true} + // CHECK: Remove attr: 1 + // CHECK: Remove attr again: 0 + // CHECK: Removed attr is null: 1 // clang-format on mlirModuleDestroy(moduleOp); From 19bc894da12a9229b3e8cfb11a0281786f07ab6c Mon Sep 17 00:00:00 2001 From: Fanbo Meng Date: Wed, 7 Oct 2020 13:22:55 -0400 Subject: [PATCH 247/321] [NFC] Add contributors names to CREDITS.TXT --- llvm/CREDITS.TXT | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/llvm/CREDITS.TXT b/llvm/CREDITS.TXT index ec6f9216d18bc..662f4bd6dbc69 100644 --- a/llvm/CREDITS.TXT +++ b/llvm/CREDITS.TXT @@ -337,6 +337,10 @@ N: Michael McCracken E: michael.mccracken@gmail.com D: Line number support for llvmgcc +N: Fanbo Meng +E: fanbo.meng@ibm.com +D: z/OS support + N: Vladimir Merzliakov E: wanderer@rsu.ru D: Test suite fixes for FreeBSD @@ -478,6 +482,10 @@ E: rspencer@reidspencer.com W: http://reidspencer.com/ D: Lots of stuff, see: http://wiki.llvm.org/index.php/User:Reid +N: Abhina Sreeskantharajan +E: Abhina.Sreeskantharajan@ibm.com +D: z/OS support + N: Alp Toker E: alp@nuanti.com W: http://atoker.com/ From 322d0afd875df66b36e4810a2b95c20a8f22ab9b Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Fri, 2 Oct 2020 18:30:53 -0700 Subject: [PATCH 248/321] [llvm][mlir] Promote the experimental reduction intrinsics to be first class intrinsics. This change renames the intrinsics to not have "experimental" in the name. The autoupgrader will handle legacy intrinsics. Relevant ML thread: http://lists.llvm.org/pipermail/llvm-dev/2020-April/140729.html Differential Revision: https://reviews.llvm.org/D88787 --- llvm/docs/LangRef.rst | 156 +++--- llvm/docs/ReleaseNotes.rst | 4 + llvm/include/llvm/CodeGen/BasicTTIImpl.h | 52 +- llvm/include/llvm/IR/Intrinsics.td | 57 +- llvm/lib/Analysis/ConstantFolding.cpp | 72 +-- llvm/lib/CodeGen/ExpandReductions.cpp | 92 ++-- .../SelectionDAG/SelectionDAGBuilder.cpp | 52 +- llvm/lib/IR/AutoUpgrade.cpp | 57 +- llvm/lib/IR/IRBuilder.cpp | 39 +- .../AArch64/AArch64TargetTransformInfo.h | 4 +- llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 4 +- llvm/lib/Target/ARM/MVETailPredication.cpp | 2 +- .../InstCombine/InstCombineAddSub.cpp | 17 +- .../Instrumentation/MemorySanitizer.cpp | 14 +- .../CostModel/AArch64/vector-reduce.ll | 174 +++--- .../test/Analysis/CostModel/ARM/reduce-add.ll | 214 ++++---- .../Analysis/CostModel/ARM/reduce-smax.ll | 230 ++++---- .../Analysis/CostModel/ARM/reduce-smin.ll | 230 ++++---- .../Analysis/CostModel/ARM/reduce-umax.ll | 230 ++++---- .../Analysis/CostModel/ARM/reduce-umin.ll | 230 ++++---- .../test/Analysis/CostModel/X86/reduce-add.ll | 374 ++++++------- .../test/Analysis/CostModel/X86/reduce-and.ll | 358 ++++++------ .../Analysis/CostModel/X86/reduce-fmax.ll | 110 ++-- .../Analysis/CostModel/X86/reduce-fmin.ll | 110 ++-- .../test/Analysis/CostModel/X86/reduce-mul.ll | 368 ++++++------- llvm/test/Analysis/CostModel/X86/reduce-or.ll | 358 ++++++------ .../Analysis/CostModel/X86/reduce-smax.ll | 430 +++++++-------- .../Analysis/CostModel/X86/reduce-smin.ll | 430 +++++++-------- .../Analysis/CostModel/X86/reduce-umax.ll | 430 +++++++-------- .../Analysis/CostModel/X86/reduce-umin.ll | 430 +++++++-------- .../test/Analysis/CostModel/X86/reduce-xor.ll | 390 ++++++------- llvm/test/Assembler/invalid-vecreduce.ll | 28 +- .../Bitcode/upgrade-vecreduce-intrinsics.ll | 142 +++-- .../upgrade-vecreduce-intrinsics.ll.bc | Bin 1860 -> 2800 bytes llvm/test/CodeGen/AArch64/aarch64-addv.ll | 24 +- llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll | 92 ++-- llvm/test/CodeGen/AArch64/arm64-vabs.ll | 12 +- llvm/test/CodeGen/AArch64/neon-dot-product.ll | 16 +- .../AArch64/sve-fixed-length-fp-reduce.ll | 154 +++--- .../AArch64/sve-fixed-length-int-reduce.ll | 518 +++++++++--------- .../AArch64/vecreduce-add-legalization.ll | 58 +- .../AArch64/vecreduce-and-legalization.ll | 58 +- llvm/test/CodeGen/AArch64/vecreduce-bool.ll | 48 +- .../vecreduce-fadd-legalization-strict.ll | 28 +- .../AArch64/vecreduce-fadd-legalization.ll | 28 +- llvm/test/CodeGen/AArch64/vecreduce-fadd.ll | 32 +- .../vecreduce-fmax-legalization-nan.ll | 28 +- .../AArch64/vecreduce-fmax-legalization.ll | 30 +- .../AArch64/vecreduce-fmin-legalization.ll | 30 +- .../vecreduce-fmul-legalization-strict.ll | 28 +- .../AArch64/vecreduce-propagate-sd-flags.ll | 4 +- .../AArch64/vecreduce-umax-legalization.ll | 62 +-- .../vecreduce-fadd-legalization-soft-float.ll | 16 +- .../ARM/vecreduce-fadd-legalization-strict.ll | 28 +- .../vecreduce-fmax-legalization-soft-float.ll | 16 +- .../vecreduce-fmin-legalization-soft-float.ll | 16 +- .../vecreduce-fmul-legalization-soft-float.ll | 16 +- .../ARM/vecreduce-fmul-legalization-strict.ll | 28 +- .../Generic/expand-experimental-reductions.ll | 74 +-- .../Thumb2/LowOverheadLoops/add_reduce.mir | 4 +- .../cond-vector-reduce-mve-codegen.ll | 10 +- .../LowOverheadLoops/inloop-vpsel-1.mir | 4 +- .../LowOverheadLoops/inloop-vpsel-2.mir | 4 +- .../LowOverheadLoops/invariant-qreg.mir | 6 +- .../lstp-insertion-position.mir | 6 +- .../Thumb2/LowOverheadLoops/matrix.mir | 4 +- .../LowOverheadLoops/mve-tail-data-types.ll | 12 +- .../CodeGen/Thumb2/LowOverheadLoops/nested.ll | 10 +- .../LowOverheadLoops/no-vpsel-liveout.mir | 4 +- .../LowOverheadLoops/non-masked-load.mir | 4 +- .../LowOverheadLoops/predicated-liveout.mir | 4 +- .../reductions-vpt-liveout.mir | 14 +- .../Thumb2/LowOverheadLoops/reductions.ll | 28 +- .../Thumb2/LowOverheadLoops/skip-debug.mir | 4 +- .../LowOverheadLoops/tail-pred-reduce.ll | 4 +- .../LowOverheadLoops/tp-multiple-vpst.ll | 8 +- .../LowOverheadLoops/unpredicated-max.mir | 4 +- .../CodeGen/Thumb2/LowOverheadLoops/vaddv.mir | 50 +- .../varying-outer-2d-reduction.ll | 4 +- .../vctp-add-operand-liveout.mir | 4 +- .../Thumb2/LowOverheadLoops/vctp16-reduce.mir | 4 +- .../LowOverheadLoops/vector-arith-codegen.ll | 8 +- .../vector-reduce-mve-tail.ll | 6 +- .../CodeGen/Thumb2/LowOverheadLoops/wlstp.mir | 4 +- .../wrong-liveout-lsr-shift.mir | 4 +- .../wrong-vctp-opcode-liveout.mir | 4 +- .../wrong-vctp-operand-liveout.mir | 4 +- .../Thumb2/mve-gather-scatter-optimisation.ll | 8 +- .../Thumb2/mve-gather-scatter-tailpred.ll | 8 +- llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll | 2 +- llvm/test/CodeGen/Thumb2/mve-vaddv.ll | 42 +- llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll | 132 ++--- .../CodeGen/Thumb2/mve-vecreduce-addpred.ll | 132 ++--- llvm/test/CodeGen/Thumb2/mve-vecreduce-bit.ll | 216 ++++---- .../test/CodeGen/Thumb2/mve-vecreduce-fadd.ll | 58 +- .../CodeGen/Thumb2/mve-vecreduce-fminmax.ll | 188 +++---- .../test/CodeGen/Thumb2/mve-vecreduce-fmul.ll | 60 +- .../CodeGen/Thumb2/mve-vecreduce-loops.ll | 106 ++-- llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll | 156 +++--- .../CodeGen/Thumb2/mve-vecreduce-mlapred.ll | 156 +++--- llvm/test/CodeGen/Thumb2/mve-vecreduce-mul.ll | 72 +-- llvm/test/CodeGen/Thumb2/mve-vmaxv.ll | 88 +-- llvm/test/CodeGen/X86/haddsub.ll | 8 +- llvm/test/CodeGen/X86/pr45378.ll | 4 +- llvm/test/CodeGen/X86/vector-reduce-add.ll | 94 ++-- .../CodeGen/X86/vector-reduce-and-bool.ll | 60 +- .../test/CodeGen/X86/vector-reduce-and-cmp.ll | 88 +-- llvm/test/CodeGen/X86/vector-reduce-and.ll | 88 +-- .../CodeGen/X86/vector-reduce-fadd-fast.ll | 64 +-- llvm/test/CodeGen/X86/vector-reduce-fadd.ll | 64 +-- .../X86/vector-reduce-fmax-fmin-fast.ll | 32 +- .../CodeGen/X86/vector-reduce-fmax-nnan.ll | 40 +- llvm/test/CodeGen/X86/vector-reduce-fmax.ll | 40 +- .../CodeGen/X86/vector-reduce-fmin-nnan.ll | 44 +- llvm/test/CodeGen/X86/vector-reduce-fmin.ll | 36 +- .../CodeGen/X86/vector-reduce-fmul-fast.ll | 64 +-- llvm/test/CodeGen/X86/vector-reduce-fmul.ll | 64 +-- llvm/test/CodeGen/X86/vector-reduce-mul.ll | 88 +-- .../test/CodeGen/X86/vector-reduce-or-bool.ll | 60 +- llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll | 98 ++-- llvm/test/CodeGen/X86/vector-reduce-or.ll | 88 +-- llvm/test/CodeGen/X86/vector-reduce-smax.ll | 88 +-- llvm/test/CodeGen/X86/vector-reduce-smin.ll | 88 +-- llvm/test/CodeGen/X86/vector-reduce-umax.ll | 88 +-- llvm/test/CodeGen/X86/vector-reduce-umin.ll | 88 +-- .../CodeGen/X86/vector-reduce-xor-bool.ll | 60 +- llvm/test/CodeGen/X86/vector-reduce-xor.ll | 88 +-- .../{experimental-reduce.ll => reduce.ll} | 28 +- .../InstCombine/vector-reductions.ll | 76 +-- .../InstSimplify/ConstProp/vecreduce.ll | 180 +++--- .../LoopVectorize/AArch64/pr33053.ll | 2 +- .../AArch64/reduction-small-size.ll | 6 +- .../ARM/mve-gather-scatter-tailpred.ll | 2 +- .../ARM/mve-reduction-predselect.ll | 20 +- .../LoopVectorize/ARM/mve-reduction-types.ll | 26 +- .../LoopVectorize/ARM/mve-reductions.ll | 24 +- .../ARM/tail-fold-multiple-icmps.ll | 4 +- .../ARM/tail-folding-not-allowed.ll | 8 +- .../X86/imprecise-through-phis.ll | 2 +- .../X86/invariant-store-vectorization.ll | 2 +- .../LoopVectorize/X86/load-deref-pred.ll | 20 +- .../Transforms/LoopVectorize/X86/pr35432.ll | 2 +- .../Transforms/LoopVectorize/X86/pr42674.ll | 2 +- .../LoopVectorize/X86/reduction-fastmath.ll | 6 +- .../LoopVectorize/X86/strided_load_cost.ll | 2 +- .../LoopVectorize/X86/tail_loop_folding.ll | 2 +- .../LoopVectorize/reduction-inloop-uf4.ll | 8 +- .../LoopVectorize/reduction-inloop.ll | 60 +- .../LoopVectorize/reduction-predselect.ll | 20 +- .../PhaseOrdering/X86/vector-reductions.ll | 10 +- .../SLPVectorizer/AArch64/gather-cost.ll | 2 +- .../SLPVectorizer/AArch64/gather-root.ll | 10 +- .../SLPVectorizer/AArch64/horizontal.ll | 6 +- .../SLPVectorizer/AArch64/transpose.ll | 2 +- .../Transforms/SLPVectorizer/X86/PR35628_1.ll | 2 +- .../Transforms/SLPVectorizer/X86/PR35628_2.ll | 2 +- .../Transforms/SLPVectorizer/X86/PR39774.ll | 4 +- .../Transforms/SLPVectorizer/X86/PR40310.ll | 2 +- .../SLPVectorizer/X86/compare-reduce.ll | 8 +- .../SLPVectorizer/X86/horizontal-list.ll | 52 +- .../SLPVectorizer/X86/horizontal-minmax.ll | 14 +- .../SLPVectorizer/X86/horizontal.ll | 40 +- .../SLPVectorizer/X86/reassociated-loads.ll | 2 +- .../Transforms/SLPVectorizer/X86/reduction.ll | 2 +- .../SLPVectorizer/X86/reduction_loads.ll | 6 +- .../SLPVectorizer/X86/reduction_unrolled.ll | 12 +- .../SLPVectorizer/X86/remark_horcost.ll | 2 +- .../SLPVectorizer/X86/reorder_repeated_ops.ll | 4 +- .../X86/reverse_extract_elements.ll | 8 +- .../SLPVectorizer/X86/scheduling.ll | 2 +- .../SLPVectorizer/X86/undef_vect.ll | 2 +- .../SLPVectorizer/X86/used-reduced-op.ll | 2 +- .../X86/vectorize-reorder-reuse.ll | 6 +- .../include/mlir/Dialect/LLVMIR/LLVMOpBase.td | 8 +- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 28 +- mlir/include/mlir/Dialect/Vector/VectorOps.td | 2 +- .../LLVMIR/CPU/test-vector-reductions-fp.mlir | 20 +- .../CPU/test-vector-reductions-int.mlir | 18 +- .../VectorToLLVM/ConvertVectorToLLVM.cpp | 26 +- .../vector-reduction-to-llvm.mlir | 8 +- .../VectorToLLVM/vector-to-llvm.mlir | 12 +- mlir/test/Target/llvmir-intrinsics.mlir | 60 +- 182 files changed, 5777 insertions(+), 5715 deletions(-) rename llvm/test/Instrumentation/MemorySanitizer/{experimental-reduce.ll => reduce.ll} (66%) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index cd5c9e23b746d..269c3675bf335 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15543,8 +15543,8 @@ should exit, this ``SUB`` is not allowed to wrap. The result is a condition that is used by the conditional branch controlling the loop. -Experimental Vector Reduction Intrinsics ----------------------------------------- +Vector Reduction Intrinsics +--------------------------- Horizontal reductions of vectors can be expressed using the following intrinsics. Each one takes a vector operand as an input and applies its @@ -15552,21 +15552,21 @@ respective operation across all elements of the vector, returning a single scalar result of the same element type. -'``llvm.experimental.vector.reduce.add.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.vector.reduce.add.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %a) - declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %a) + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a) + declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a) Overview: """"""""" -The '``llvm.experimental.vector.reduce.add.*``' intrinsics do an integer ``ADD`` +The '``llvm.vector.reduce.add.*``' intrinsics do an integer ``ADD`` reduction of a vector, returning the result as a scalar. The return type matches the element-type of the vector input. @@ -15574,34 +15574,34 @@ Arguments: """""""""" The argument to this intrinsic must be a vector of integer values. -'``llvm.experimental.vector.reduce.v2.fadd.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.vector.reduce.fadd.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %start_value, <4 x float> %a) - declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double %start_value, <2 x double> %a) + declare float @llvm.vector.reduce.fadd.v4f32(float %start_value, <4 x float> %a) + declare double @llvm.vector.reduce.fadd.v2f64(double %start_value, <2 x double> %a) Overview: """"""""" -The '``llvm.experimental.vector.reduce.v2.fadd.*``' intrinsics do a floating-point +The '``llvm.vector.reduce.fadd.*``' intrinsics do a floating-point ``ADD`` reduction of a vector, returning the result as a scalar. The return type matches the element-type of the vector input. -If the intrinsic call has the 'reassoc' or 'fast' flags set, then the -reduction will not preserve the associativity of an equivalent scalarized -counterpart. Otherwise the reduction will be *ordered*, thus implying that -the operation respects the associativity of a scalarized reduction. That is, the -reduction begins with the start value and performs an fadd operation with consecutively -increasing vector element indices. See the following pseudocode: +If the intrinsic call has the 'reassoc' flag set, then the reduction will not +preserve the associativity of an equivalent scalarized counterpart. Otherwise +the reduction will be *sequential*, thus implying that the operation respects +the associativity of a scalarized reduction. That is, the reduction begins with +the start value and performs an fadd operation with consecutively increasing +vector element indices. See the following pseudocode: :: - float ordered_fadd(start_value, input_vector) + float sequential_fadd(start_value, input_vector) result = start_value for i = 0 to length(input_vector) result = result + input_vector[i] @@ -15619,25 +15619,25 @@ Examples: :: - %unord = call reassoc float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %input) ; unordered reduction - %ord = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %start_value, <4 x float> %input) ; ordered reduction + %unord = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> %input) ; relaxed reduction + %ord = call float @llvm.vector.reduce.fadd.v4f32(float %start_value, <4 x float> %input) ; sequential reduction -'``llvm.experimental.vector.reduce.mul.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.vector.reduce.mul.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> %a) - declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> %a) + declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %a) + declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %a) Overview: """"""""" -The '``llvm.experimental.vector.reduce.mul.*``' intrinsics do an integer ``MUL`` +The '``llvm.vector.reduce.mul.*``' intrinsics do an integer ``MUL`` reduction of a vector, returning the result as a scalar. The return type matches the element-type of the vector input. @@ -15645,34 +15645,34 @@ Arguments: """""""""" The argument to this intrinsic must be a vector of integer values. -'``llvm.experimental.vector.reduce.v2.fmul.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.vector.reduce.fmul.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %start_value, <4 x float> %a) - declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double %start_value, <2 x double> %a) + declare float @llvm.vector.reduce.fmul.v4f32(float %start_value, <4 x float> %a) + declare double @llvm.vector.reduce.fmul.v2f64(double %start_value, <2 x double> %a) Overview: """"""""" -The '``llvm.experimental.vector.reduce.v2.fmul.*``' intrinsics do a floating-point +The '``llvm.vector.reduce.fmul.*``' intrinsics do a floating-point ``MUL`` reduction of a vector, returning the result as a scalar. The return type matches the element-type of the vector input. -If the intrinsic call has the 'reassoc' or 'fast' flags set, then the -reduction will not preserve the associativity of an equivalent scalarized -counterpart. Otherwise the reduction will be *ordered*, thus implying that -the operation respects the associativity of a scalarized reduction. That is, the -reduction begins with the start value and performs an fmul operation with consecutively -increasing vector element indices. See the following pseudocode: +If the intrinsic call has the 'reassoc' flag set, then the reduction will not +preserve the associativity of an equivalent scalarized counterpart. Otherwise +the reduction will be *sequential*, thus implying that the operation respects +the associativity of a scalarized reduction. That is, the reduction begins with +the start value and performs an fmul operation with consecutively increasing +vector element indices. See the following pseudocode: :: - float ordered_fmul(start_value, input_vector) + float sequential_fmul(start_value, input_vector) result = start_value for i = 0 to length(input_vector) result = result * input_vector[i] @@ -15690,23 +15690,23 @@ Examples: :: - %unord = call reassoc float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %input) ; unordered reduction - %ord = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %start_value, <4 x float> %input) ; ordered reduction + %unord = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 1.0, <4 x float> %input) ; relaxed reduction + %ord = call float @llvm.vector.reduce.fmul.v4f32(float %start_value, <4 x float> %input) ; sequential reduction -'``llvm.experimental.vector.reduce.and.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.vector.reduce.and.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> %a) + declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a) Overview: """"""""" -The '``llvm.experimental.vector.reduce.and.*``' intrinsics do a bitwise ``AND`` +The '``llvm.vector.reduce.and.*``' intrinsics do a bitwise ``AND`` reduction of a vector, returning the result as a scalar. The return type matches the element-type of the vector input. @@ -15714,20 +15714,20 @@ Arguments: """""""""" The argument to this intrinsic must be a vector of integer values. -'``llvm.experimental.vector.reduce.or.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.vector.reduce.or.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %a) + declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a) Overview: """"""""" -The '``llvm.experimental.vector.reduce.or.*``' intrinsics do a bitwise ``OR`` reduction +The '``llvm.vector.reduce.or.*``' intrinsics do a bitwise ``OR`` reduction of a vector, returning the result as a scalar. The return type matches the element-type of the vector input. @@ -15735,20 +15735,20 @@ Arguments: """""""""" The argument to this intrinsic must be a vector of integer values. -'``llvm.experimental.vector.reduce.xor.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.vector.reduce.xor.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> %a) + declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a) Overview: """"""""" -The '``llvm.experimental.vector.reduce.xor.*``' intrinsics do a bitwise ``XOR`` +The '``llvm.vector.reduce.xor.*``' intrinsics do a bitwise ``XOR`` reduction of a vector, returning the result as a scalar. The return type matches the element-type of the vector input. @@ -15756,20 +15756,20 @@ Arguments: """""""""" The argument to this intrinsic must be a vector of integer values. -'``llvm.experimental.vector.reduce.smax.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.vector.reduce.smax.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %a) + declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a) Overview: """"""""" -The '``llvm.experimental.vector.reduce.smax.*``' intrinsics do a signed integer +The '``llvm.vector.reduce.smax.*``' intrinsics do a signed integer ``MAX`` reduction of a vector, returning the result as a scalar. The return type matches the element-type of the vector input. @@ -15777,20 +15777,20 @@ Arguments: """""""""" The argument to this intrinsic must be a vector of integer values. -'``llvm.experimental.vector.reduce.smin.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.vector.reduce.smin.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %a) + declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a) Overview: """"""""" -The '``llvm.experimental.vector.reduce.smin.*``' intrinsics do a signed integer +The '``llvm.vector.reduce.smin.*``' intrinsics do a signed integer ``MIN`` reduction of a vector, returning the result as a scalar. The return type matches the element-type of the vector input. @@ -15798,20 +15798,20 @@ Arguments: """""""""" The argument to this intrinsic must be a vector of integer values. -'``llvm.experimental.vector.reduce.umax.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.vector.reduce.umax.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %a) + declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a) Overview: """"""""" -The '``llvm.experimental.vector.reduce.umax.*``' intrinsics do an unsigned +The '``llvm.vector.reduce.umax.*``' intrinsics do an unsigned integer ``MAX`` reduction of a vector, returning the result as a scalar. The return type matches the element-type of the vector input. @@ -15819,20 +15819,20 @@ Arguments: """""""""" The argument to this intrinsic must be a vector of integer values. -'``llvm.experimental.vector.reduce.umin.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.vector.reduce.umin.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %a) + declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a) Overview: """"""""" -The '``llvm.experimental.vector.reduce.umin.*``' intrinsics do an unsigned +The '``llvm.vector.reduce.umin.*``' intrinsics do an unsigned integer ``MIN`` reduction of a vector, returning the result as a scalar. The return type matches the element-type of the vector input. @@ -15840,21 +15840,21 @@ Arguments: """""""""" The argument to this intrinsic must be a vector of integer values. -'``llvm.experimental.vector.reduce.fmax.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.vector.reduce.fmax.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a) - declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a) + declare float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a) + declare double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a) Overview: """"""""" -The '``llvm.experimental.vector.reduce.fmax.*``' intrinsics do a floating-point +The '``llvm.vector.reduce.fmax.*``' intrinsics do a floating-point ``MAX`` reduction of a vector, returning the result as a scalar. The return type matches the element-type of the vector input. @@ -15870,8 +15870,8 @@ Arguments: """""""""" The argument to this intrinsic must be a vector of floating-point values. -'``llvm.experimental.vector.reduce.fmin.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.vector.reduce.fmin.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" @@ -15879,13 +15879,13 @@ This is an overloaded intrinsic. :: - declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a) - declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a) + declare float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a) + declare double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a) Overview: """"""""" -The '``llvm.experimental.vector.reduce.fmin.*``' intrinsics do a floating-point +The '``llvm.vector.reduce.fmin.*``' intrinsics do a floating-point ``MIN`` reduction of a vector, returning the result as a scalar. The return type matches the element-type of the vector input. diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 47ce9fa10d908..b81547dca6df5 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -63,6 +63,10 @@ Changes to the LLVM IR * Added the ``byref`` attribute to better represent argument passing for the `amdgpu_kernel` calling convention. +* The ``llvm.experimental.vector.reduce`` family of intrinsics have been renamed + to drop the "experimental" from the name, reflecting their now fully supported + status in the IR. + Changes to building LLVM ------------------------ diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 2a1ec0103d09f..f8a357a8479b1 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1180,19 +1180,19 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0], VarMask, Alignment, CostKind, I); } - case Intrinsic::experimental_vector_reduce_add: - case Intrinsic::experimental_vector_reduce_mul: - case Intrinsic::experimental_vector_reduce_and: - case Intrinsic::experimental_vector_reduce_or: - case Intrinsic::experimental_vector_reduce_xor: - case Intrinsic::experimental_vector_reduce_v2_fadd: - case Intrinsic::experimental_vector_reduce_v2_fmul: - case Intrinsic::experimental_vector_reduce_smax: - case Intrinsic::experimental_vector_reduce_smin: - case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: - case Intrinsic::experimental_vector_reduce_umax: - case Intrinsic::experimental_vector_reduce_umin: { + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: { IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I); return getIntrinsicInstrCost(Attrs, CostKind); } @@ -1407,46 +1407,46 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0, CostKind); } - case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::vector_reduce_add: return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy, /*IsPairwiseForm=*/false, CostKind); - case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::vector_reduce_mul: return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy, /*IsPairwiseForm=*/false, CostKind); - case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::vector_reduce_and: return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy, /*IsPairwiseForm=*/false, CostKind); - case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::vector_reduce_or: return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy, /*IsPairwiseForm=*/false, CostKind); - case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::vector_reduce_xor: return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy, /*IsPairwiseForm=*/false, CostKind); - case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::vector_reduce_fadd: // FIXME: Add new flag for cost of strict reductions. return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy, /*IsPairwiseForm=*/false, CostKind); - case Intrinsic::experimental_vector_reduce_v2_fmul: + case Intrinsic::vector_reduce_fmul: // FIXME: Add new flag for cost of strict reductions. return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy, /*IsPairwiseForm=*/false, CostKind); - case Intrinsic::experimental_vector_reduce_smax: - case Intrinsic::experimental_vector_reduce_smin: - case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: return thisT()->getMinMaxReductionCost( VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), /*IsPairwiseForm=*/false, /*IsUnsigned=*/false, CostKind); - case Intrinsic::experimental_vector_reduce_umax: - case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: return thisT()->getMinMaxReductionCost( VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), /*IsPairwiseForm=*/false, diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 20c6d3b8cb1c4..d05b9982c4874 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1452,34 +1452,35 @@ def int_memset_element_unordered_atomic //===------------------------ Reduction Intrinsics ------------------------===// // let IntrProperties = [IntrNoMem, IntrWillReturn] in { - def int_experimental_vector_reduce_v2_fadd : Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, - llvm_anyvector_ty]>; - def int_experimental_vector_reduce_v2_fmul : Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, - llvm_anyvector_ty]>; - def int_experimental_vector_reduce_add : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_mul : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_and : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_or : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_xor : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_smax : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_smin : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_umax : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_umin : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_fmax : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_fmin : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; + + def int_vector_reduce_fadd : Intrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty]>; + def int_vector_reduce_fmul : Intrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty]>; + def int_vector_reduce_add : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_mul : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_and : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_or : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_xor : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_smax : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_smin : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_umax : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_umin : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_fmax : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_fmin : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; } //===----- Matrix intrinsics ---------------------------------------------===// diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 0ddca5a09838d..9ae67d074c5c2 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1457,15 +1457,15 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::smul_fix_sat: case Intrinsic::bitreverse: case Intrinsic::is_constant: - case Intrinsic::experimental_vector_reduce_add: - case Intrinsic::experimental_vector_reduce_mul: - case Intrinsic::experimental_vector_reduce_and: - case Intrinsic::experimental_vector_reduce_or: - case Intrinsic::experimental_vector_reduce_xor: - case Intrinsic::experimental_vector_reduce_smin: - case Intrinsic::experimental_vector_reduce_smax: - case Intrinsic::experimental_vector_reduce_umin: - case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_umax: // Target intrinsics case Intrinsic::arm_mve_vctp8: case Intrinsic::arm_mve_vctp16: @@ -1711,31 +1711,31 @@ Constant *ConstantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) { return nullptr; const APInt &X = CI->getValue(); switch (IID) { - case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::vector_reduce_add: Acc = Acc + X; break; - case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::vector_reduce_mul: Acc = Acc * X; break; - case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::vector_reduce_and: Acc = Acc & X; break; - case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::vector_reduce_or: Acc = Acc | X; break; - case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::vector_reduce_xor: Acc = Acc ^ X; break; - case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::vector_reduce_smin: Acc = APIntOps::smin(Acc, X); break; - case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::vector_reduce_smax: Acc = APIntOps::smax(Acc, X); break; - case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::vector_reduce_umin: Acc = APIntOps::umin(Acc, X); break; - case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::vector_reduce_umax: Acc = APIntOps::umax(Acc, X); break; } @@ -2240,15 +2240,15 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, if (isa(Operands[0])) { switch (IntrinsicID) { default: break; - case Intrinsic::experimental_vector_reduce_add: - case Intrinsic::experimental_vector_reduce_mul: - case Intrinsic::experimental_vector_reduce_and: - case Intrinsic::experimental_vector_reduce_or: - case Intrinsic::experimental_vector_reduce_xor: - case Intrinsic::experimental_vector_reduce_smin: - case Intrinsic::experimental_vector_reduce_smax: - case Intrinsic::experimental_vector_reduce_umin: - case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_umax: return ConstantInt::get(Ty, 0); } } @@ -2259,15 +2259,15 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, auto *Op = cast(Operands[0]); switch (IntrinsicID) { default: break; - case Intrinsic::experimental_vector_reduce_add: - case Intrinsic::experimental_vector_reduce_mul: - case Intrinsic::experimental_vector_reduce_and: - case Intrinsic::experimental_vector_reduce_or: - case Intrinsic::experimental_vector_reduce_xor: - case Intrinsic::experimental_vector_reduce_smin: - case Intrinsic::experimental_vector_reduce_smax: - case Intrinsic::experimental_vector_reduce_umin: - case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_umax: if (Constant *C = ConstantFoldVectorReduce(IntrinsicID, Op)) return C; break; diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp index dfaaafaf811f1..184eae51b2ce2 100644 --- a/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/llvm/lib/CodeGen/ExpandReductions.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This pass implements IR expansion for reduction intrinsics, allowing targets -// to enable the experimental intrinsics until just before codegen. +// to enable the intrinsics until just before codegen. // //===----------------------------------------------------------------------===// @@ -30,27 +30,27 @@ namespace { unsigned getOpcode(Intrinsic::ID ID) { switch (ID) { - case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::vector_reduce_fadd: return Instruction::FAdd; - case Intrinsic::experimental_vector_reduce_v2_fmul: + case Intrinsic::vector_reduce_fmul: return Instruction::FMul; - case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::vector_reduce_add: return Instruction::Add; - case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::vector_reduce_mul: return Instruction::Mul; - case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::vector_reduce_and: return Instruction::And; - case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::vector_reduce_or: return Instruction::Or; - case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::vector_reduce_xor: return Instruction::Xor; - case Intrinsic::experimental_vector_reduce_smax: - case Intrinsic::experimental_vector_reduce_smin: - case Intrinsic::experimental_vector_reduce_umax: - case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: return Instruction::ICmp; - case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: return Instruction::FCmp; default: llvm_unreachable("Unexpected ID"); @@ -59,17 +59,17 @@ unsigned getOpcode(Intrinsic::ID ID) { RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) { switch (ID) { - case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::vector_reduce_smax: return RecurrenceDescriptor::MRK_SIntMax; - case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::vector_reduce_smin: return RecurrenceDescriptor::MRK_SIntMin; - case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::vector_reduce_umax: return RecurrenceDescriptor::MRK_UIntMax; - case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::vector_reduce_umin: return RecurrenceDescriptor::MRK_UIntMin; - case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::vector_reduce_fmax: return RecurrenceDescriptor::MRK_FloatMax; - case Intrinsic::experimental_vector_reduce_fmin: + case Intrinsic::vector_reduce_fmin: return RecurrenceDescriptor::MRK_FloatMin; default: return RecurrenceDescriptor::MRK_Invalid; @@ -83,19 +83,19 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { if (auto *II = dyn_cast(&I)) { switch (II->getIntrinsicID()) { default: break; - case Intrinsic::experimental_vector_reduce_v2_fadd: - case Intrinsic::experimental_vector_reduce_v2_fmul: - case Intrinsic::experimental_vector_reduce_add: - case Intrinsic::experimental_vector_reduce_mul: - case Intrinsic::experimental_vector_reduce_and: - case Intrinsic::experimental_vector_reduce_or: - case Intrinsic::experimental_vector_reduce_xor: - case Intrinsic::experimental_vector_reduce_smax: - case Intrinsic::experimental_vector_reduce_smin: - case Intrinsic::experimental_vector_reduce_umax: - case Intrinsic::experimental_vector_reduce_umin: - case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: if (TTI->shouldExpandReduction(II)) Worklist.push_back(II); @@ -116,8 +116,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { Builder.setFastMathFlags(FMF); switch (ID) { default: llvm_unreachable("Unexpected intrinsic!"); - case Intrinsic::experimental_vector_reduce_v2_fadd: - case Intrinsic::experimental_vector_reduce_v2_fmul: { + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: { // FMFs must be attached to the call, otherwise it's an ordered reduction // and it can't be handled by generating a shuffle sequence. Value *Acc = II->getArgOperand(0); @@ -135,15 +135,15 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { } break; } - case Intrinsic::experimental_vector_reduce_add: - case Intrinsic::experimental_vector_reduce_mul: - case Intrinsic::experimental_vector_reduce_and: - case Intrinsic::experimental_vector_reduce_or: - case Intrinsic::experimental_vector_reduce_xor: - case Intrinsic::experimental_vector_reduce_smax: - case Intrinsic::experimental_vector_reduce_smin: - case Intrinsic::experimental_vector_reduce_umax: - case Intrinsic::experimental_vector_reduce_umin: { + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: { Value *Vec = II->getArgOperand(0); if (!isPowerOf2_32( cast(Vec->getType())->getNumElements())) @@ -152,8 +152,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); break; } - case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: { + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: { // FIXME: We only expand 'fast' reductions here because the underlying // code in createMinMaxOp() assumes that comparisons use 'fast' // semantics. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index a166b790bca66..2f3601137f832 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6762,19 +6762,19 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, LowerDeoptimizeCall(&I); return; - case Intrinsic::experimental_vector_reduce_v2_fadd: - case Intrinsic::experimental_vector_reduce_v2_fmul: - case Intrinsic::experimental_vector_reduce_add: - case Intrinsic::experimental_vector_reduce_mul: - case Intrinsic::experimental_vector_reduce_and: - case Intrinsic::experimental_vector_reduce_or: - case Intrinsic::experimental_vector_reduce_xor: - case Intrinsic::experimental_vector_reduce_smax: - case Intrinsic::experimental_vector_reduce_smin: - case Intrinsic::experimental_vector_reduce_umax: - case Intrinsic::experimental_vector_reduce_umin: - case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: visitVectorReduce(I, Intrinsic); return; @@ -8937,7 +8937,7 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, SDFlags.copyFMF(*FPMO); switch (Intrinsic) { - case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::vector_reduce_fadd: if (SDFlags.hasAllowReassociation()) Res = DAG.getNode(ISD::FADD, dl, VT, Op1, DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags), @@ -8945,7 +8945,7 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2, SDFlags); break; - case Intrinsic::experimental_vector_reduce_v2_fmul: + case Intrinsic::vector_reduce_fmul: if (SDFlags.hasAllowReassociation()) Res = DAG.getNode(ISD::FMUL, dl, VT, Op1, DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags), @@ -8953,37 +8953,37 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2, SDFlags); break; - case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::vector_reduce_add: Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::vector_reduce_mul: Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::vector_reduce_and: Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::vector_reduce_or: Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::vector_reduce_xor: Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::vector_reduce_smax: Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::vector_reduce_smin: Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::vector_reduce_umax: Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::vector_reduce_umin: Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::vector_reduce_fmax: Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); break; - case Intrinsic::experimental_vector_reduce_fmin: + case Intrinsic::vector_reduce_fmin: Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); break; default: diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index d27c1b4591496..f5b235a57861d 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsX86.h" @@ -717,18 +718,42 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { } case 'e': { SmallVector Groups; - static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+"); + static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+"); if (R.match(Name, &Groups)) { + Intrinsic::ID ID; + ID = StringSwitch(Groups[1]) + .Case("add", Intrinsic::vector_reduce_add) + .Case("mul", Intrinsic::vector_reduce_mul) + .Case("and", Intrinsic::vector_reduce_and) + .Case("or", Intrinsic::vector_reduce_or) + .Case("xor", Intrinsic::vector_reduce_xor) + .Case("smax", Intrinsic::vector_reduce_smax) + .Case("smin", Intrinsic::vector_reduce_smin) + .Case("umax", Intrinsic::vector_reduce_umax) + .Case("umin", Intrinsic::vector_reduce_umin) + .Case("fmax", Intrinsic::vector_reduce_fmax) + .Case("fmin", Intrinsic::vector_reduce_fmin) + .Default(Intrinsic::not_intrinsic); + if (ID != Intrinsic::not_intrinsic) { + rename(F); + auto Args = F->getFunctionType()->params(); + NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]}); + return true; + } + } + static const Regex R2( + "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+"); + Groups.clear(); + if (R2.match(Name, &Groups)) { Intrinsic::ID ID = Intrinsic::not_intrinsic; if (Groups[1] == "fadd") - ID = Intrinsic::experimental_vector_reduce_v2_fadd; + ID = Intrinsic::vector_reduce_fadd; if (Groups[1] == "fmul") - ID = Intrinsic::experimental_vector_reduce_v2_fmul; - + ID = Intrinsic::vector_reduce_fmul; if (ID != Intrinsic::not_intrinsic) { rename(F); auto Args = F->getFunctionType()->params(); - Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]}; + Type *Tys[] = {Args[1]}; NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys); return true; } @@ -3620,28 +3645,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { DefaultCase(); return; } - case Intrinsic::experimental_vector_reduce_v2_fmul: { - SmallVector Args; - if (CI->isFast()) - Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0)); - else - Args.push_back(CI->getOperand(0)); - Args.push_back(CI->getOperand(1)); - NewCall = Builder.CreateCall(NewFn, Args); - cast(NewCall)->copyFastMathFlags(CI); - break; - } - case Intrinsic::experimental_vector_reduce_v2_fadd: { - SmallVector Args; - if (CI->isFast()) - Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType())); - else - Args.push_back(CI->getOperand(0)); - Args.push_back(CI->getOperand(1)); - NewCall = Builder.CreateCall(NewFn, Args); - cast(NewCall)->copyFastMathFlags(CI); - break; - } case Intrinsic::arm_neon_vld1: case Intrinsic::arm_neon_vld2: case Intrinsic::arm_neon_vld3: diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index febfe189df6ea..3319f48b42d7f 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -325,61 +325,53 @@ static CallInst *getReductionIntrinsic(IRBuilderBase *Builder, Intrinsic::ID ID, CallInst *IRBuilderBase::CreateFAddReduce(Value *Acc, Value *Src) { Module *M = GetInsertBlock()->getParent()->getParent(); Value *Ops[] = {Acc, Src}; - Type *Tys[] = {Acc->getType(), Src->getType()}; - auto Decl = Intrinsic::getDeclaration( - M, Intrinsic::experimental_vector_reduce_v2_fadd, Tys); + auto Decl = Intrinsic::getDeclaration(M, Intrinsic::vector_reduce_fadd, + {Src->getType()}); return createCallHelper(Decl, Ops, this); } CallInst *IRBuilderBase::CreateFMulReduce(Value *Acc, Value *Src) { Module *M = GetInsertBlock()->getParent()->getParent(); Value *Ops[] = {Acc, Src}; - Type *Tys[] = {Acc->getType(), Src->getType()}; - auto Decl = Intrinsic::getDeclaration( - M, Intrinsic::experimental_vector_reduce_v2_fmul, Tys); + auto Decl = Intrinsic::getDeclaration(M, Intrinsic::vector_reduce_fmul, + {Src->getType()}); return createCallHelper(Decl, Ops, this); } CallInst *IRBuilderBase::CreateAddReduce(Value *Src) { - return getReductionIntrinsic(this, Intrinsic::experimental_vector_reduce_add, - Src); + return getReductionIntrinsic(this, Intrinsic::vector_reduce_add, Src); } CallInst *IRBuilderBase::CreateMulReduce(Value *Src) { - return getReductionIntrinsic(this, Intrinsic::experimental_vector_reduce_mul, - Src); + return getReductionIntrinsic(this, Intrinsic::vector_reduce_mul, Src); } CallInst *IRBuilderBase::CreateAndReduce(Value *Src) { - return getReductionIntrinsic(this, Intrinsic::experimental_vector_reduce_and, - Src); + return getReductionIntrinsic(this, Intrinsic::vector_reduce_and, Src); } CallInst *IRBuilderBase::CreateOrReduce(Value *Src) { - return getReductionIntrinsic(this, Intrinsic::experimental_vector_reduce_or, - Src); + return getReductionIntrinsic(this, Intrinsic::vector_reduce_or, Src); } CallInst *IRBuilderBase::CreateXorReduce(Value *Src) { - return getReductionIntrinsic(this, Intrinsic::experimental_vector_reduce_xor, - Src); + return getReductionIntrinsic(this, Intrinsic::vector_reduce_xor, Src); } CallInst *IRBuilderBase::CreateIntMaxReduce(Value *Src, bool IsSigned) { - auto ID = IsSigned ? Intrinsic::experimental_vector_reduce_smax - : Intrinsic::experimental_vector_reduce_umax; + auto ID = + IsSigned ? Intrinsic::vector_reduce_smax : Intrinsic::vector_reduce_umax; return getReductionIntrinsic(this, ID, Src); } CallInst *IRBuilderBase::CreateIntMinReduce(Value *Src, bool IsSigned) { - auto ID = IsSigned ? Intrinsic::experimental_vector_reduce_smin - : Intrinsic::experimental_vector_reduce_umin; + auto ID = + IsSigned ? Intrinsic::vector_reduce_smin : Intrinsic::vector_reduce_umin; return getReductionIntrinsic(this, ID, Src); } CallInst *IRBuilderBase::CreateFPMaxReduce(Value *Src, bool NoNaN) { - auto Rdx = getReductionIntrinsic( - this, Intrinsic::experimental_vector_reduce_fmax, Src); + auto Rdx = getReductionIntrinsic(this, Intrinsic::vector_reduce_fmax, Src); if (NoNaN) { FastMathFlags FMF; FMF.setNoNaNs(); @@ -389,8 +381,7 @@ CallInst *IRBuilderBase::CreateFPMaxReduce(Value *Src, bool NoNaN) { } CallInst *IRBuilderBase::CreateFPMinReduce(Value *Src, bool NoNaN) { - auto Rdx = getReductionIntrinsic( - this, Intrinsic::experimental_vector_reduce_fmin, Src); + auto Rdx = getReductionIntrinsic(this, Intrinsic::vector_reduce_fmin, Src); if (NoNaN) { FastMathFlags FMF; FMF.setNoNaNs(); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index be27849bc19f6..c585d13d11358 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -219,8 +219,8 @@ class AArch64TTIImpl : public BasicTTIImplBase { bool shouldExpandReduction(const IntrinsicInst *II) const { switch (II->getIntrinsicID()) { - case Intrinsic::experimental_vector_reduce_v2_fadd: - case Intrinsic::experimental_vector_reduce_v2_fmul: + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: // We don't have legalization support for ordered FP reductions. return !II->getFastMathFlags().allowReassoc(); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 4189eba111e3d..689b484ad9767 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -195,8 +195,8 @@ class ARMTTIImpl : public BasicTTIImplBase { bool shouldExpandReduction(const IntrinsicInst *II) const { switch (II->getIntrinsicID()) { - case Intrinsic::experimental_vector_reduce_v2_fadd: - case Intrinsic::experimental_vector_reduce_v2_fmul: + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: // We don't have legalization support for ordered FP reductions. return !II->getFastMathFlags().allowReassoc(); default: diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp index f01f485859b04..a721a16705829 100644 --- a/llvm/lib/Target/ARM/MVETailPredication.cpp +++ b/llvm/lib/Target/ARM/MVETailPredication.cpp @@ -270,7 +270,7 @@ bool MVETailPredication::IsPredicatedVectorLoop() { case Intrinsic::uadd_sat: case Intrinsic::ssub_sat: case Intrinsic::usub_sat: - case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::vector_reduce_add: continue; case Intrinsic::fma: case Intrinsic::trunc: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 15bdbd01e71ca..00e981686e797 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1824,8 +1824,7 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { } auto m_AddRdx = [](Value *&Vec) { - return m_OneUse( - m_Intrinsic(m_Value(Vec))); + return m_OneUse(m_Intrinsic(m_Value(Vec))); }; Value *V0, *V1; if (match(Op0, m_AddRdx(V0)) && match(Op1, m_AddRdx(V1)) && @@ -1833,8 +1832,8 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { // Difference of sums is sum of differences: // add_rdx(V0) - add_rdx(V1) --> add_rdx(V0 - V1) Value *Sub = Builder.CreateSub(V0, V1); - Value *Rdx = Builder.CreateIntrinsic( - Intrinsic::experimental_vector_reduce_add, {Sub->getType()}, {Sub}); + Value *Rdx = Builder.CreateIntrinsic(Intrinsic::vector_reduce_add, + {Sub->getType()}, {Sub}); return replaceInstUsesWith(I, Rdx); } @@ -2280,9 +2279,8 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) { } auto m_FaddRdx = [](Value *&Sum, Value *&Vec) { - return m_OneUse( - m_Intrinsic( - m_Value(Sum), m_Value(Vec))); + return m_OneUse(m_Intrinsic(m_Value(Sum), + m_Value(Vec))); }; Value *A0, *A1, *V0, *V1; if (match(Op0, m_FaddRdx(A0, V0)) && match(Op1, m_FaddRdx(A1, V1)) && @@ -2290,9 +2288,8 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) { // Difference of sums is sum of differences: // add_rdx(A0, V0) - add_rdx(A1, V1) --> add_rdx(A0, V0 - V1) - A1 Value *Sub = Builder.CreateFSubFMF(V0, V1, &I); - Value *Rdx = Builder.CreateIntrinsic( - Intrinsic::experimental_vector_reduce_v2_fadd, - {A0->getType(), Sub->getType()}, {A0, Sub}, &I); + Value *Rdx = Builder.CreateIntrinsic(Intrinsic::vector_reduce_fadd, + {Sub->getType()}, {A0, Sub}, &I); return BinaryOperator::CreateFSubFMF(Rdx, A1, &I); } diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index df2144d206d5a..f5dde74244828 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2995,7 +2995,7 @@ struct MemorySanitizerVisitor : public InstVisitor { setOrigin(&I, getOrigin(&I, 0)); } - // Instrument experimental.vector.reduce.or intrinsic. + // Instrument vector.reduce.or intrinsic. // Valid (non-poisoned) set bits in the operand pull low the // corresponding shadow bits. void handleVectorReduceOrIntrinsic(IntrinsicInst &I) { @@ -3013,7 +3013,7 @@ struct MemorySanitizerVisitor : public InstVisitor { setOrigin(&I, getOrigin(&I, 0)); } - // Instrument experimental.vector.reduce.or intrinsic. + // Instrument vector.reduce.and intrinsic. // Valid (non-poisoned) unset bits in the operand pull down the // corresponding shadow bits. void handleVectorReduceAndIntrinsic(IntrinsicInst &I) { @@ -3264,15 +3264,15 @@ struct MemorySanitizerVisitor : public InstVisitor { case Intrinsic::masked_load: handleMaskedLoad(I); break; - case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::vector_reduce_and: handleVectorReduceAndIntrinsic(I); break; - case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::vector_reduce_or: handleVectorReduceOrIntrinsic(I); break; - case Intrinsic::experimental_vector_reduce_add: - case Intrinsic::experimental_vector_reduce_xor: - case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_mul: handleVectorReduceIntrinsic(I); break; case Intrinsic::x86_sse_stmxcsr: diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll b/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll index d42d740dabff5..3ddf90f12c2ef 100644 --- a/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll +++ b/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll @@ -2,278 +2,278 @@ ; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE ; COST-LABEL: add.i8.v8i8 -; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %v) +; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %v) ; CODE-LABEL: add.i8.v8i8 ; CODE: addv b0, v0.8b define i8 @add.i8.v8i8(<8 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %v) + %r = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %v) ret i8 %r } ; COST-LABEL: add.i8.v16i8 -; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %v) +; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %v) ; CODE-LABEL: add.i8.v16i8 ; CODE: addv b0, v0.16b define i8 @add.i8.v16i8(<16 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %v) + %r = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %v) ret i8 %r } ; COST-LABEL: add.i16.v4i16 -; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> %v) +; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %v) ; CODE-LABEL: add.i16.v4i16 ; CODE: addv h0, v0.4h define i16 @add.i16.v4i16(<4 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> %v) + %r = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %v) ret i16 %r } ; COST-LABEL: add.i16.v8i16 -; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %v) +; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %v) ; CODE-LABEL: add.i16.v8i16 ; CODE: addv h0, v0.8h define i16 @add.i16.v8i16(<8 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %v) + %r = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %v) ret i16 %r } ; COST-LABEL: add.i32.v4i32 -; COST: Found an estimated cost of 1 for instruction: %r = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v) +; COST: Found an estimated cost of 1 for instruction: %r = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v) ; CODE-LABEL: add.i32.v4i32 ; CODE: addv s0, v0.4s define i32 @add.i32.v4i32(<4 x i32> %v) { - %r = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v) + %r = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v) ret i32 %r } ; COST-LABEL: umin.i8.v8i8 -; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> %v) +; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v) ; CODE-LABEL: umin.i8.v8i8 ; CODE: uminv b0, v0.8b define i8 @umin.i8.v8i8(<8 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> %v) + %r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v) ret i8 %r } ; COST-LABEL: umin.i8.v16i8 -; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %v) +; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v) ; CODE-LABEL: umin.i8.v16i8 ; CODE: uminv b0, v0.16b define i8 @umin.i8.v16i8(<16 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %v) + %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v) ret i8 %r } ; COST-LABEL: umin.i16.v4i16 -; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> %v) +; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v) ; CODE-LABEL: umin.i16.v4i16 ; CODE: uminv h0, v0.4h define i16 @umin.i16.v4i16(<4 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> %v) + %r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v) ret i16 %r } ; COST-LABEL: umin.i16.v8i16 -; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %v) +; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v) ; CODE-LABEL: umin.i16.v8i16 ; CODE: uminv h0, v0.8h define i16 @umin.i16.v8i16(<8 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %v) + %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v) ret i16 %r } ; COST-LABEL: umin.i32.v4i32 -; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %v) +; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %v) ; CODE-LABEL: umin.i32.v4i32 ; CODE: uminv s0, v0.4s define i32 @umin.i32.v4i32(<4 x i32> %v) { - %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %v) + %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %v) ret i32 %r } ; COST-LABEL: umax.i8.v8i8 -; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> %v) +; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v) ; CODE-LABEL: umax.i8.v8i8 ; CODE: umaxv b0, v0.8b define i8 @umax.i8.v8i8(<8 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> %v) + %r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v) ret i8 %r } ; COST-LABEL: umax.i8.v16i8 -; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %v) +; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v) ; CODE-LABEL: umax.i8.v16i8 ; CODE: umaxv b0, v0.16b define i8 @umax.i8.v16i8(<16 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %v) + %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v) ret i8 %r } ; COST-LABEL: umax.i16.v4i16 -; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> %v) +; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v) ; CODE-LABEL: umax.i16.v4i16 ; CODE: umaxv h0, v0.4h define i16 @umax.i16.v4i16(<4 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> %v) + %r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v) ret i16 %r } ; COST-LABEL: umax.i16.v8i16 -; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %v) +; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v) ; CODE-LABEL: umax.i16.v8i16 ; CODE: umaxv h0, v0.8h define i16 @umax.i16.v8i16(<8 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %v) + %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v) ret i16 %r } ; COST-LABEL: umax.i32.v4i32 -; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %v) +; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %v) ; CODE-LABEL: umax.i32.v4i32 ; CODE: umaxv s0, v0.4s define i32 @umax.i32.v4i32(<4 x i32> %v) { - %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %v) + %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %v) ret i32 %r } ; COST-LABEL: smin.i8.v8i8 -; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> %v) +; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v) ; CODE-LABEL: smin.i8.v8i8 ; CODE: sminv b0, v0.8b define i8 @smin.i8.v8i8(<8 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> %v) + %r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v) ret i8 %r } ; COST-LABEL: smin.i8.v16i8 -; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %v) +; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v) ; CODE-LABEL: smin.i8.v16i8 ; CODE: sminv b0, v0.16b define i8 @smin.i8.v16i8(<16 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %v) + %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v) ret i8 %r } ; COST-LABEL: smin.i16.v4i16 -; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> %v) +; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v) ; CODE-LABEL: smin.i16.v4i16 ; CODE: sminv h0, v0.4h define i16 @smin.i16.v4i16(<4 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> %v) + %r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v) ret i16 %r } ; COST-LABEL: smin.i16.v8i16 -; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %v) +; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v) ; CODE-LABEL: smin.i16.v8i16 ; CODE: sminv h0, v0.8h define i16 @smin.i16.v8i16(<8 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %v) + %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v) ret i16 %r } ; COST-LABEL: smin.i32.v4i32 -; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %v) +; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v) ; CODE-LABEL: smin.i32.v4i32 ; CODE: sminv s0, v0.4s define i32 @smin.i32.v4i32(<4 x i32> %v) { - %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %v) + %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v) ret i32 %r } ; COST-LABEL: smax.i8.v8i8 -; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> %v) +; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v) ; CODE-LABEL: smax.i8.v8i8 ; CODE: smaxv b0, v0.8b define i8 @smax.i8.v8i8(<8 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> %v) + %r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v) ret i8 %r } ; COST-LABEL: smax.i8.v16i8 -; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %v) +; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v) ; CODE-LABEL: smax.i8.v16i8 ; CODE: smaxv b0, v0.16b define i8 @smax.i8.v16i8(<16 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %v) + %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v) ret i8 %r } ; COST-LABEL: smax.i16.v4i16 -; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> %v) +; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v) ; CODE-LABEL: smax.i16.v4i16 ; CODE: smaxv h0, v0.4h define i16 @smax.i16.v4i16(<4 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> %v) + %r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v) ret i16 %r } ; COST-LABEL: smax.i16.v8i16 -; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %v) +; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v) ; CODE-LABEL: smax.i16.v8i16 ; CODE: smaxv h0, v0.8h define i16 @smax.i16.v8i16(<8 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %v) + %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v) ret i16 %r } ; COST-LABEL: smax.i32.v4i32 -; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %v) +; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %v) ; CODE-LABEL: smax.i32.v4i32 ; CODE: smaxv s0, v0.4s define i32 @smax.i32.v4i32(<4 x i32> %v) { - %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %v) + %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %v) ret i32 %r } ; COST-LABEL: fmin.f32.v4f32 -; COST: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %v) +; COST: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) ; CODE-LABEL: fmin.f32.v4f32 ; CODE: fminnmv s0, v0.4s define float @fmin.f32.v4f32(<4 x float> %v) { - %r = call nnan float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %v) + %r = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) ret float %r } ; COST-LABEL: fmax.f32.v4f32 -; COST: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %v) +; COST: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) ; CODE-LABEL: fmax.f32.v4f32 ; CODE: fmaxnmv s0, v0.4s define float @fmax.f32.v4f32(<4 x float> %v) { - %r = call nnan float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %v) + %r = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) ret float %r } -declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) - -declare i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) - -declare i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>) - -declare i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) - -declare i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) - -declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>) - -declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) +declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) + +declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) + +declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) + +declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) + +declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) + +declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) + +declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-add.ll b/llvm/test/Analysis/CostModel/ARM/reduce-add.ll index a03f283cd1e44..d71af6d0b6cfa 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-add.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-add.ll @@ -8,155 +8,155 @@ define i32 @reduce_i64(i32 %arg) { ; V8M-RECIP-LABEL: 'reduce_i64' -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-RECIP-LABEL: 'reduce_i64' -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'reduce_i64' -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'reduce_i64' -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-SIZE-LABEL: 'reduce_i64' -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; MVE-SIZE-LABEL: 'reduce_i64' -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; V8M-RECIP-LABEL: 'reduce_i32' -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-RECIP-LABEL: 'reduce_i32' -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 682 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1070 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 682 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1070 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'reduce_i32' -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 782 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4120 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5658 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 11806 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36390 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 782 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4120 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5658 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 11806 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36390 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'reduce_i32' -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-SIZE-LABEL: 'reduce_i32' -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; MVE-SIZE-LABEL: 'reduce_i32' -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll index 8b8ef521764f4..87123b45502f0 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll @@ -5,171 +5,171 @@ define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' -; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i64' -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; V8M-LABEL: 'reduce_i32' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i32' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; V8M-LABEL: 'reduce_i16' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i16' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; V8M-LABEL: 'reduce_i8' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i8' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 395 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 395 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.smax.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.smax.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.smax.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.smax.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.smax.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.smax.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.smax.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll b/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll index a39f2ffaf648e..88790472ba384 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll @@ -5,171 +5,171 @@ define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' -; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i64' -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; V8M-LABEL: 'reduce_i32' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i32' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; V8M-LABEL: 'reduce_i16' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i16' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; V8M-LABEL: 'reduce_i8' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i8' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 395 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 395 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.smin.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.smin.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.smin.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.smin.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.smin.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.smin.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.smin.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.smin.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll index bb3205ab33360..fbdc56e162979 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll @@ -5,171 +5,171 @@ define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' -; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i64' -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; V8M-LABEL: 'reduce_i32' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i32' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; V8M-LABEL: 'reduce_i16' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i16' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; V8M-LABEL: 'reduce_i8' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i8' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 395 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 395 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.umax.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.umax.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.umax.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.umax.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.umax.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.umax.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.umax.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.umax.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll b/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll index a5e0e56e0465d..bb94dbe2a5b8c 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll @@ -5,171 +5,171 @@ define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' -; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i64' -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; V8M-LABEL: 'reduce_i32' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i32' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; V8M-LABEL: 'reduce_i16' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i16' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; V8M-LABEL: 'reduce_i8' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i8' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 395 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 395 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.umin.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.umin.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.umin.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.umin.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.umin.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.umin.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-add.ll b/llvm/test/Analysis/CostModel/X86/reduce-add.ll index d7ca1536072bf..cb5d0fa7ea03d 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-add.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-add.ll @@ -12,279 +12,279 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'reduce_i64' -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE-LABEL: 'reduce_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'reduce_i32' -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE-LABEL: 'reduce_i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'reduce_i16' -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE-LABEL: 'reduce_i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'reduce_i8' -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-and.ll b/llvm/test/Analysis/CostModel/X86/reduce-and.ll index 8bcf5da07bdae..37a2146d52b4a 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-and.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-and.ll @@ -10,258 +10,258 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE-LABEL: 'reduce_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE-LABEL: 'reduce_i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE-LABEL: 'reduce_i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef) ret i32 undef } define i32 @reduce_i1(i32 %arg) { ; SSE-LABEL: 'reduce_i1' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i1' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i1' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i1' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) - %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) - %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) - %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) - %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) - %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) - %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) - %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) + %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef) + %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef) + %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef) + %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef) + %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef) + %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef) + %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef) + %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.and.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.and.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.and.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.and.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.and.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.and.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.and.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.and.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.and.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.and.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.and.v128i8(<128 x i8>) -declare i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1>) +declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1>) +declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>) +declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>) +declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>) +declare i1 @llvm.vector.reduce.and.v16i1(<16 x i1>) +declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1>) +declare i1 @llvm.vector.reduce.and.v64i1(<64 x i1>) +declare i1 @llvm.vector.reduce.and.v128i1(<128 x i1>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-fmax.ll b/llvm/test/Analysis/CostModel/X86/reduce-fmax.ll index 41db9ed818e5d..4f803c4e107be 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-fmax.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-fmax.ll @@ -11,83 +11,83 @@ define i32 @reduce_f64(i32 %arg) { ; SSE-LABEL: 'reduce_f64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_f64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_f64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> undef) - %V2 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> undef) - %V4 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> undef) - %V8 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> undef) - %V16 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> undef) + %V1 = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> undef) + %V2 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) + %V4 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) + %V8 = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> undef) + %V16 = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> undef) ret i32 undef } define i32 @reduce_f32(i32 %arg) { ; SSE-LABEL: 'reduce_f32' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_f32' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_f32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef) - %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef) - %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef) - %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef) - %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef) - %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef) + %V1 = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> undef) + %V2 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) + %V4 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) + %V8 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) + %V16 = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> undef) + %V32 = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> undef) ret i32 undef } -declare double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double>) +declare double @llvm.vector.reduce.fmax.v1f64(<1 x double>) +declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) +declare double @llvm.vector.reduce.fmax.v8f64(<8 x double>) +declare double @llvm.vector.reduce.fmax.v16f64(<16 x double>) -declare float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float>) +declare float @llvm.vector.reduce.fmax.v1f32(<1 x float>) +declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>) +declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) +declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>) +declare float @llvm.vector.reduce.fmax.v32f32(<32 x float>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-fmin.ll b/llvm/test/Analysis/CostModel/X86/reduce-fmin.ll index a12cd8e60703e..70b0a44313d2f 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-fmin.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-fmin.ll @@ -11,83 +11,83 @@ define i32 @reduce_f64(i32 %arg) { ; SSE-LABEL: 'reduce_f64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_f64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_f64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> undef) - %V2 = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> undef) - %V4 = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> undef) - %V8 = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> undef) - %V16 = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> undef) + %V1 = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> undef) + %V2 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) + %V4 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) + %V8 = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> undef) + %V16 = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> undef) ret i32 undef } define i32 @reduce_f32(i32 %arg) { ; SSE-LABEL: 'reduce_f32' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fmin.v1f32(<1 x float> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call float @llvm.vector.reduce.fmin.v32f32(<32 x float> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_f32' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fmin.v1f32(<1 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call float @llvm.vector.reduce.fmin.v32f32(<32 x float> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_f32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fmin.v1f32(<1 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call float @llvm.vector.reduce.fmin.v32f32(<32 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> undef) - %V2 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> undef) - %V4 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> undef) - %V8 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> undef) - %V16 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> undef) - %V32 = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> undef) + %V1 = call float @llvm.vector.reduce.fmin.v1f32(<1 x float> undef) + %V2 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) + %V4 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) + %V8 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) + %V16 = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> undef) + %V32 = call float @llvm.vector.reduce.fmin.v32f32(<32 x float> undef) ret i32 undef } -declare double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double>) +declare double @llvm.vector.reduce.fmin.v1f64(<1 x double>) +declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) +declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>) +declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>) -declare float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float>) +declare float @llvm.vector.reduce.fmin.v1f32(<1 x float>) +declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) +declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) +declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>) +declare float @llvm.vector.reduce.fmin.v32f32(<32 x float>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-mul.ll b/llvm/test/Analysis/CostModel/X86/reduce-mul.ll index a4984f5bdcbd7..b79d262b80a68 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-mul.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-mul.ll @@ -10,276 +10,276 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i64' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i64' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i64' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE-LABEL: 'reduce_i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE-LABEL: 'reduce_i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.mul.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.mul.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.mul.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.mul.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.mul.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.mul.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.mul.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.mul.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.mul.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-or.ll b/llvm/test/Analysis/CostModel/X86/reduce-or.ll index 97e0847a50a11..065a75c44abf7 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-or.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-or.ll @@ -10,258 +10,258 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE-LABEL: 'reduce_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE-LABEL: 'reduce_i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE-LABEL: 'reduce_i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef) ret i32 undef } define i32 @reduce_i1(i32 %arg) { ; SSE-LABEL: 'reduce_i1' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i1' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i1' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i1' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) - %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) - %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) - %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) - %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) - %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) - %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) - %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) + %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef) + %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef) + %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef) + %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef) + %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef) + %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef) + %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef) + %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.or.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.or.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.or.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.or.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.or.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.or.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.or.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.or.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.or.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.or.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.or.v128i8(<128 x i8>) -declare i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1>) +declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1>) +declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>) +declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>) +declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1>) +declare i1 @llvm.vector.reduce.or.v16i1(<16 x i1>) +declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1>) +declare i1 @llvm.vector.reduce.or.v64i1(<64 x i1>) +declare i1 @llvm.vector.reduce.or.v128i1(<128 x i1>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-smax.ll b/llvm/test/Analysis/CostModel/X86/reduce-smax.ll index 19070d8721fa3..cbbad17b137a1 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-smax.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-smax.ll @@ -11,322 +11,322 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'reduce_i64' -; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) -; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) -; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) -; SSE41-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) -; SSE41-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'reduce_i32' -; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'reduce_i16' -; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'reduce_i8' -; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.smax.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.smax.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.smax.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.smax.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.smax.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.smax.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.smax.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-smin.ll b/llvm/test/Analysis/CostModel/X86/reduce-smin.ll index 07483dec5b8a4..0901d09e0360c 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-smin.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-smin.ll @@ -11,322 +11,322 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'reduce_i64' -; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) -; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) -; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) -; SSE41-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) -; SSE41-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'reduce_i32' -; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'reduce_i16' -; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'reduce_i8' -; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.smin.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.smin.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.smin.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.smin.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.smin.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.smin.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.smin.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.smin.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-umax.ll b/llvm/test/Analysis/CostModel/X86/reduce-umax.ll index b8787b7b7e790..d2a81fe253780 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-umax.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-umax.ll @@ -11,322 +11,322 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'reduce_i64' -; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) -; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) -; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) -; SSE41-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) -; SSE41-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'reduce_i32' -; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'reduce_i16' -; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'reduce_i8' -; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.umax.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.umax.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.umax.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.umax.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.umax.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.umax.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.umax.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.umax.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-umin.ll b/llvm/test/Analysis/CostModel/X86/reduce-umin.ll index 5a8b49b6c6338..4dbdf71eadc0f 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-umin.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-umin.ll @@ -11,322 +11,322 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'reduce_i64' -; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) -; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) -; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) -; SSE41-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) -; SSE41-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; SSE41-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'reduce_i32' -; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'reduce_i16' -; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'reduce_i8' -; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) -; SSE4-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; SSE4-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.umin.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.umin.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.umin.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.umin.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.umin.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.umin.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-xor.ll b/llvm/test/Analysis/CostModel/X86/reduce-xor.ll index f1e586bd2e9af..15d3e968d4dfe 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-xor.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-xor.ll @@ -10,280 +10,280 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE-LABEL: 'reduce_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE-LABEL: 'reduce_i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE-LABEL: 'reduce_i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef) ret i32 undef } define i32 @reduce_i1(i32 %arg) { ; SSE2-LABEL: 'reduce_i1' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i1' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i1' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i1' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i1' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i1' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) - %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) - %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) - %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) - %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) - %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) - %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) - %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) + %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) + %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) + %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) + %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) + %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) + %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) + %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) + %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.xor.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.xor.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.xor.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.xor.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.xor.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.xor.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.xor.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.xor.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.xor.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.xor.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.xor.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.xor.v128i8(<128 x i8>) -declare i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1>) +declare i1 @llvm.vector.reduce.xor.v1i1(<1 x i1>) +declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1>) +declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1>) +declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1>) +declare i1 @llvm.vector.reduce.xor.v16i1(<16 x i1>) +declare i1 @llvm.vector.reduce.xor.v32i1(<32 x i1>) +declare i1 @llvm.vector.reduce.xor.v64i1(<64 x i1>) +declare i1 @llvm.vector.reduce.xor.v128i1(<128 x i1>) diff --git a/llvm/test/Assembler/invalid-vecreduce.ll b/llvm/test/Assembler/invalid-vecreduce.ll index 301b0a8938ef8..4806ea60883fb 100644 --- a/llvm/test/Assembler/invalid-vecreduce.ll +++ b/llvm/test/Assembler/invalid-vecreduce.ll @@ -1,34 +1,34 @@ ; RUN: not opt -S < %s 2>&1 | FileCheck %s -; CHECK: Intrinsic has incorrect argument type! -; CHECK-NEXT: float (double, <2 x double>)* @llvm.experimental.vector.reduce.v2.fadd.f32.f64.v2f64 +; CHECK: Intrinsic has incorrect return type! +; CHECK-NEXT: float (double, <2 x double>)* @llvm.vector.reduce.fadd.f32.f64.v2f64 define float @fadd_invalid_scalar_res(double %acc, <2 x double> %in) { - %res = call float @llvm.experimental.vector.reduce.v2.fadd.f32.f64.v2f64(double %acc, <2 x double> %in) + %res = call float @llvm.vector.reduce.fadd.f32.f64.v2f64(double %acc, <2 x double> %in) ret float %res } ; CHECK: Intrinsic has incorrect argument type! -; CHECK-NEXT: double (float, <2 x double>)* @llvm.experimental.vector.reduce.v2.fadd.f64.f32.v2f64 +; CHECK-NEXT: double (float, <2 x double>)* @llvm.vector.reduce.fadd.f64.f32.v2f64 define double @fadd_invalid_scalar_start(float %acc, <2 x double> %in) { - %res = call double @llvm.experimental.vector.reduce.v2.fadd.f64.f32.v2f64(float %acc, <2 x double> %in) + %res = call double @llvm.vector.reduce.fadd.f64.f32.v2f64(float %acc, <2 x double> %in) ret double %res } -; CHECK: Intrinsic has incorrect argument type! -; CHECK-NEXT: <2 x double> (double, <2 x double>)* @llvm.experimental.vector.reduce.v2.fadd.v2f64.f64.v2f64 +; CHECK: Intrinsic has incorrect return type! +; CHECK-NEXT: <2 x double> (double, <2 x double>)* @llvm.vector.reduce.fadd.v2f64.f64.v2f64 define <2 x double> @fadd_invalid_vector_res(double %acc, <2 x double> %in) { - %res = call <2 x double> @llvm.experimental.vector.reduce.v2.fadd.v2f64.f64.v2f64(double %acc, <2 x double> %in) + %res = call <2 x double> @llvm.vector.reduce.fadd.v2f64.f64.v2f64(double %acc, <2 x double> %in) ret <2 x double> %res } ; CHECK: Intrinsic has incorrect argument type! -; CHECK-NEXT: double (<2 x double>, <2 x double>)* @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64.v2f64 +; CHECK-NEXT: double (<2 x double>, <2 x double>)* @llvm.vector.reduce.fadd.f64.v2f64.v2f64 define double @fadd_invalid_vector_start(<2 x double> %in, <2 x double> %acc) { - %res = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64.v2f64(<2 x double> %acc, <2 x double> %in) + %res = call double @llvm.vector.reduce.fadd.f64.v2f64.v2f64(<2 x double> %acc, <2 x double> %in) ret double %res } -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.f64.v2f64(double %acc, <2 x double> %in) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.f32.v2f64(float %acc, <2 x double> %in) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64.v2f64(<2 x double> %acc, <2 x double> %in) -declare <2 x double> @llvm.experimental.vector.reduce.v2.fadd.v2f64.f64.v2f64(double %acc, <2 x double> %in) +declare float @llvm.vector.reduce.fadd.f32.f64.v2f64(double %acc, <2 x double> %in) +declare double @llvm.vector.reduce.fadd.f64.f32.v2f64(float %acc, <2 x double> %in) +declare double @llvm.vector.reduce.fadd.f64.v2f64.v2f64(<2 x double> %acc, <2 x double> %in) +declare <2 x double> @llvm.vector.reduce.fadd.v2f64.f64.v2f64(double %acc, <2 x double> %in) diff --git a/llvm/test/Bitcode/upgrade-vecreduce-intrinsics.ll b/llvm/test/Bitcode/upgrade-vecreduce-intrinsics.ll index c91c7bc3953af..124d589565141 100644 --- a/llvm/test/Bitcode/upgrade-vecreduce-intrinsics.ll +++ b/llvm/test/Bitcode/upgrade-vecreduce-intrinsics.ll @@ -1,64 +1,130 @@ ; RUN: opt -S < %s | FileCheck %s ; RUN: llvm-dis < %s.bc | FileCheck %s -define float @fadd_acc(<4 x float> %in, float %acc) { -; CHECK-LABEL: @fadd_acc -; CHECK: %res = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %acc, <4 x float> %in) - %res = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float %acc, <4 x float> %in) + +define float @fadd_v2(<4 x float> %in, float %acc) { +; CHECK-LABEL: @fadd_v2 +; CHECK: %res = call float @llvm.vector.reduce.fadd.v4f32(float %acc, <4 x float> %in) + %res = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %acc, <4 x float> %in) ret float %res } -define float @fadd_undef(<4 x float> %in) { -; CHECK-LABEL: @fadd_undef -; CHECK: %res = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float undef, <4 x float> %in) - %res = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %in) +define float @fadd_v2_fast(<4 x float> %in, float %acc) { +; CHECK-LABEL: @fadd_v2_fast +; CHECK: %res = call fast float @llvm.vector.reduce.fadd.v4f32(float %acc, <4 x float> %in) + %res = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %acc, <4 x float> %in) ret float %res } -define float @fadd_fast_acc(<4 x float> %in, float %acc) { -; CHECK-LABEL: @fadd_fast_acc -; CHECK: %res = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %in) - %res = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float %acc, <4 x float> %in) +define float @fmul_v2(<4 x float> %in, float %acc) { +; CHECK-LABEL: @fmul_v2 +; CHECK: %res = call float @llvm.vector.reduce.fmul.v4f32(float %acc, <4 x float> %in) + %res = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %acc, <4 x float> %in) ret float %res } -define float @fadd_fast_undef(<4 x float> %in) { -; CHECK-LABEL: @fadd_fast_undef -; CHECK: %res = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %in) - %res = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %in) +define float @fmul_v2_fast(<4 x float> %in, float %acc) { +; CHECK-LABEL: @fmul_v2_fast +; CHECK: %res = call fast float @llvm.vector.reduce.fmul.v4f32(float %acc, <4 x float> %in) + %res = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %acc, <4 x float> %in) ret float %res } -define float @fmul_acc(<4 x float> %in, float %acc) { -; CHECK-LABEL: @fmul_acc -; CHECK: %res = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %acc, <4 x float> %in) - %res = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float %acc, <4 x float> %in) +define float @fmin(<4 x float> %in) { +; CHECK-LABEL: @fmin +; CHECK: %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %in) + %res = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %in) ret float %res } -define float @fmul_undef(<4 x float> %in) { -; CHECK-LABEL: @fmul_undef -; CHECK: %res = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float undef, <4 x float> %in) - %res = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %in) +define float @fmax(<4 x float> %in) { +; CHECK-LABEL: @fmax +; CHECK: %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %in) + %res = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %in) ret float %res } -define float @fmul_fast_acc(<4 x float> %in, float %acc) { -; CHECK-LABEL: @fmul_fast_acc -; CHECK: %res = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> %in) - %res = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float %acc, <4 x float> %in) - ret float %res +define i32 @and(<4 x i32> %in) { +; CHECK-LABEL: @and +; CHECK: %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %in) + %res = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> %in) + ret i32 %res } -define float @fmul_fast_undef(<4 x float> %in) { -; CHECK-LABEL: @fmul_fast_undef -; CHECK: %res = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> %in) - %res = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %in) - ret float %res +define i32 @or(<4 x i32> %in) { +; CHECK-LABEL: @or +; CHECK: %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %in) + %res = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %in) + ret i32 %res +} + +define i32 @xor(<4 x i32> %in) { +; CHECK-LABEL: @xor +; CHECK: %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %in) + %res = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> %in) + ret i32 %res +} + +define i32 @smin(<4 x i32> %in) { +; CHECK-LABEL: @smin +; CHECK: %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %in) + %res = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %in) + ret i32 %res } -declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float, <4 x float>) -; CHECK: declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>) +define i32 @smax(<4 x i32> %in) { +; CHECK-LABEL: @smax +; CHECK: %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %in) + %res = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %in) + ret i32 %res +} + +define i32 @umin(<4 x i32> %in) { +; CHECK-LABEL: @umin +; CHECK: %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %in) + %res = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %in) + ret i32 %res +} + +define i32 @umax(<4 x i32> %in) { +; CHECK-LABEL: @umax +; CHECK: %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %in) + %res = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %in) + ret i32 %res +} + + +declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>) +declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>) + +declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>) +; CHECK: declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) + +declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) +; CHECK: declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) + +declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>) +; CHECK: declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>) + +declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>) +; CHECK: declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) + +declare i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32>) +; CHECK: declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>) + +declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) +; CHECK: declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) + +declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) +; CHECK: declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) + +declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) +; CHECK: declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) + +declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>) +; CHECK: declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) + + + + -declare float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float, <4 x float>) -; CHECK: declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>) diff --git a/llvm/test/Bitcode/upgrade-vecreduce-intrinsics.ll.bc b/llvm/test/Bitcode/upgrade-vecreduce-intrinsics.ll.bc index 18f44f7acd981395e030968a586d555a16721196..2c9af9a521b7393981333c5e5f9321ddff35bf7f 100644 GIT binary patch literal 2800 zcmb7GeN0nV6u<2&eHH8L6PXrmn^$p3XJ#n{w56@Ked^F*R-AvZL?chyY7Jm1rB+?G zS6h8-!=y5ob(>jaBU?0CQWr8mViurGnd|(DS=>ep1n^@)!;~#1nr-J6Q8d|BPSSgF zPk!f|-}&gxbFy96%kmM*K?rS;8y2m5<6G~hhn=r2=v%Z>WXwTYIYM$VLNmo$1PaZC z9~mh;=;)KIkkrS}GoD(m zKXIkuwDr!XU!H54_ld1TOkv+i89@6tkeO_g7FAb#dpS-!DLkQE?`xgBVeLok;u1zS z#e^|OMUE*J1aZ#Kl1>eTb2T!|R1j(C_&6$>O31`(KTy+CV|R(kcg>%yY!ntIO=q z5nHulJO2zZzE>LgObT@$hZR8Y!>>fft9l1j-h7d*g*C-etxlfY6(*Y!jq)2vafhbH zd8$P2)bw<#FSM(?hMWv6x*$#1`dC{lVM~WwgMM<4jNDZr`2FN=KM@KGenu!r&!$G> z)SyApU{DN=QDZKugzi;e*Q%~*)jgcXYk0?UCTh9FnmY;WnW!~JSkhzpp&D|}HgefE zApj>Y_~U}Vh79S*(1RQZyOS3-%Y;Uy5J(8S5|Nkhj!}09s6V=>Q3J81uU$3Vu1aUl zeol2>tLlxY&vVkB;;gxkH77FX$&~33Yic1Zhf+n5^wD0+Rl<@?$kQ!lUm<%VXD#zmr+yTvY&3R6BORK)#t$s_S!GyKyF;2Q+U(}RN zS(2;`8uF;eI?P&H;F@avE*B~lb^;$6xm!mD5|NEVG&R~qjku`$hQjVE4$bi3ylaE% z>p-BQz>DN`$JPN%FVyUl$msL`6P=xmgcz^V}%SS=L%dX^fkQQR4jjxmZ6 zgJOhH{OO_^Xllqn!S()t^!5NX?4lkR*uh`4>KpBP)&?z4vgS*vy*E^6+h^T!;8nVzRZsL_Zs|}B zb<+i{RSe?>D{i`|@gyH1cj+>d4-3E=*Nr#-GW^A)pvkngBWgJeLnLfRqE;AaJcXYC zjzK6F^c+cdOGLyoO5Hl9j;18|4cV_{4tV4|lQ>Jy&VxG^jsdQqF}@q{)enR#k{;=x z9c=&Q^K6dF>E`IkCdZ7&yd1a=dTr8Sq7ebMi8^L$C!aiyLFifRn(VPm#Wb+5Oar@m z8rW6Sz^#%7;JvR9o zy!Bj|YjE!fgm(m*>U|AOaBl|P47#o^RIm4h{holY-qR4|>hvK`ZE#0GAMm)FYd!jq zQI9j|y(LC{s1(j@93X%84qjlAU1(fKc1WlefWH4{3x#HdgU)3K#Qn_{nW-iVl6Y3x z#l#bxKo%G44SK^?Uyw0*%*DlJrDg7=rdm%~DOYSL^>W@VWgO>S=JmR3mo6=-HP^aJ w@nDOBo~B@t(-*AW;r0|Y`?m%-w`U1##Kbl%@ihbkus^=qCVgEUUP_RE0O`UKtN;K2 delta 987 zcmew$dW28S$@#!NQxOIRRt5%!Bpw45pU8b_?=RcwhV}}uo&bs~@-Q$|FiuoUQw@}0 zU|1h8=^Xfm+`N-{L% zC?v2jFi0u-@Vz|nv})tcZ{ND(=3S0IWK#P_ZF+p^k4QPS>CxXlMZ~FHKmIiAmhbfC zcRzorc$c)f-G@O5XgVD5%oJ#abASv9BhOxBzCp?_Ii`9fk%SXZ*gBB-3s(fQeloPhu9uEfRsa} zh&P%YX!JvQHbD;Q7G_2?qZQdCLL{6iaWfB2H`g0d>gHntSlkTsKL`{mFfeceX(K)X zr51(`CJ6`42ByVK77W@AEc=-vHt00ev)y8) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) -declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) -declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) define i8 @add_B(<16 x i8>* %arr) { ; CHECK-LABEL: add_B ; CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b %bin.rdx = load <16 x i8>, <16 x i8>* %arr - %r = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %bin.rdx) + %r = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %bin.rdx) ret i8 %r } @@ -18,7 +18,7 @@ define i16 @add_H(<8 x i16>* %arr) { ; CHECK-LABEL: add_H ; CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h %bin.rdx = load <8 x i16>, <8 x i16>* %arr - %r = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %bin.rdx) + %r = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %bin.rdx) ret i16 %r } @@ -26,7 +26,7 @@ define i32 @add_S( <4 x i32>* %arr) { ; CHECK-LABEL: add_S ; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s %bin.rdx = load <4 x i32>, <4 x i32>* %arr - %r = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %bin.rdx) + %r = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %bin.rdx) ret i32 %r } @@ -35,11 +35,11 @@ define i64 @add_D(<2 x i64>* %arr) { ; CHECK-NOT: addv ; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d %bin.rdx = load <2 x i64>, <2 x i64>* %arr - %r = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %bin.rdx) + %r = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %bin.rdx) ret i64 %r } -declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) define i32 @oversized_ADDV_256(i8* noalias nocapture readonly %arg1, i8* noalias nocapture readonly %arg2) { ; CHECK-LABEL: oversized_ADDV_256 @@ -55,16 +55,16 @@ entry: %7 = icmp slt <8 x i32> %6, zeroinitializer %8 = sub nsw <8 x i32> zeroinitializer, %6 %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6 - %r = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %9) + %r = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %9) ret i32 %r } -declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) define i32 @oversized_ADDV_512(<16 x i32>* %arr) { ; CHECK-LABEL: oversized_ADDV_512 ; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s %bin.rdx = load <16 x i32>, <16 x i32>* %arr - %r = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %bin.rdx) + %r = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %bin.rdx) ret i32 %r } diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll index b6f95a7f3bf0e..4ff80296c0d99 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll @@ -2,28 +2,28 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" -declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) -declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>) - -declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) -declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) - -declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>) +declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) +declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) + +declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) +declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) + +declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) ; CHECK-LABEL: smax_B ; CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b define i8 @smax_B(<16 x i8>* nocapture readonly %arr) { %arr.load = load <16 x i8>, <16 x i8>* %arr - %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %arr.load) + %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %arr.load) ret i8 %r } @@ -31,7 +31,7 @@ define i8 @smax_B(<16 x i8>* nocapture readonly %arr) { ; CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h define i16 @smax_H(<8 x i16>* nocapture readonly %arr) { %arr.load = load <8 x i16>, <8 x i16>* %arr - %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %arr.load) + %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %arr.load) ret i16 %r } @@ -39,7 +39,7 @@ define i16 @smax_H(<8 x i16>* nocapture readonly %arr) { ; CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s define i32 @smax_S(<4 x i32> * nocapture readonly %arr) { %arr.load = load <4 x i32>, <4 x i32>* %arr - %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %arr.load) + %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %arr.load) ret i32 %r } @@ -47,7 +47,7 @@ define i32 @smax_S(<4 x i32> * nocapture readonly %arr) { ; CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b define i8 @umax_B(<16 x i8>* nocapture readonly %arr) { %arr.load = load <16 x i8>, <16 x i8>* %arr - %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %arr.load) + %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %arr.load) ret i8 %r } @@ -55,7 +55,7 @@ define i8 @umax_B(<16 x i8>* nocapture readonly %arr) { ; CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h define i16 @umax_H(<8 x i16>* nocapture readonly %arr) { %arr.load = load <8 x i16>, <8 x i16>* %arr - %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %arr.load) + %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %arr.load) ret i16 %r } @@ -63,7 +63,7 @@ define i16 @umax_H(<8 x i16>* nocapture readonly %arr) { ; CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s define i32 @umax_S(<4 x i32>* nocapture readonly %arr) { %arr.load = load <4 x i32>, <4 x i32>* %arr - %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %arr.load) + %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %arr.load) ret i32 %r } @@ -71,7 +71,7 @@ define i32 @umax_S(<4 x i32>* nocapture readonly %arr) { ; CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b define i8 @smin_B(<16 x i8>* nocapture readonly %arr) { %arr.load = load <16 x i8>, <16 x i8>* %arr - %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %arr.load) + %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %arr.load) ret i8 %r } @@ -79,7 +79,7 @@ define i8 @smin_B(<16 x i8>* nocapture readonly %arr) { ; CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h define i16 @smin_H(<8 x i16>* nocapture readonly %arr) { %arr.load = load <8 x i16>, <8 x i16>* %arr - %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %arr.load) + %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %arr.load) ret i16 %r } @@ -87,7 +87,7 @@ define i16 @smin_H(<8 x i16>* nocapture readonly %arr) { ; CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s define i32 @smin_S(<4 x i32>* nocapture readonly %arr) { %arr.load = load <4 x i32>, <4 x i32>* %arr - %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %arr.load) + %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %arr.load) ret i32 %r } @@ -95,7 +95,7 @@ define i32 @smin_S(<4 x i32>* nocapture readonly %arr) { ; CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b define i8 @umin_B(<16 x i8>* nocapture readonly %arr) { %arr.load = load <16 x i8>, <16 x i8>* %arr - %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %arr.load) + %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %arr.load) ret i8 %r } @@ -103,7 +103,7 @@ define i8 @umin_B(<16 x i8>* nocapture readonly %arr) { ; CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h define i16 @umin_H(<8 x i16>* nocapture readonly %arr) { %arr.load = load <8 x i16>, <8 x i16>* %arr - %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %arr.load) + %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %arr.load) ret i16 %r } @@ -111,7 +111,7 @@ define i16 @umin_H(<8 x i16>* nocapture readonly %arr) { ; CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s define i32 @umin_S(<4 x i32>* nocapture readonly %arr) { %arr.load = load <4 x i32>, <4 x i32>* %arr - %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %arr.load) + %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %arr.load) ret i32 %r } @@ -119,7 +119,7 @@ define i32 @umin_S(<4 x i32>* nocapture readonly %arr) { ; CHECK: fmaxnmv define float @fmaxnm_S(<4 x float>* nocapture readonly %arr) { %arr.load = load <4 x float>, <4 x float>* %arr - %r = call nnan float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %arr.load) + %r = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arr.load) ret float %r } @@ -127,22 +127,22 @@ define float @fmaxnm_S(<4 x float>* nocapture readonly %arr) { ; CHECK: fminnmv define float @fminnm_S(<4 x float>* nocapture readonly %arr) { %arr.load = load <4 x float>, <4 x float>* %arr - %r = call nnan float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %arr.load) + %r = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arr.load) ret float %r } -declare i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>) define i16 @oversized_umax_256(<16 x i16>* nocapture readonly %arr) { ; CHECK-LABEL: oversized_umax_256 ; CHECK: umax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: umaxv {{h[0-9]+}}, [[V0]] %arr.load = load <16 x i16>, <16 x i16>* %arr - %r = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> %arr.load) + %r = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %arr.load) ret i16 %r } -declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>) define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr) { ; CHECK-LABEL: oversized_umax_512 @@ -151,22 +151,22 @@ define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr) { ; CHECK-NEXT: umax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK-NEXT: umaxv {{s[0-9]+}}, [[V0]] %arr.load = load <16 x i32>, <16 x i32>* %arr - %r = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %arr.load) + %r = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %arr.load) ret i32 %r } -declare i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>) define i16 @oversized_umin_256(<16 x i16>* nocapture readonly %arr) { ; CHECK-LABEL: oversized_umin_256 ; CHECK: umin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: uminv {{h[0-9]+}}, [[V0]] %arr.load = load <16 x i16>, <16 x i16>* %arr - %r = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> %arr.load) + %r = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %arr.load) ret i16 %r } -declare i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>) define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr) { ; CHECK-LABEL: oversized_umin_512 @@ -175,22 +175,22 @@ define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr) { ; CHECK-NEXT: umin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK-NEXT: uminv {{s[0-9]+}}, [[V0]] %arr.load = load <16 x i32>, <16 x i32>* %arr - %r = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> %arr.load) + %r = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %arr.load) ret i32 %r } -declare i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>) define i16 @oversized_smax_256(<16 x i16>* nocapture readonly %arr) { ; CHECK-LABEL: oversized_smax_256 ; CHECK: smax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: smaxv {{h[0-9]+}}, [[V0]] %arr.load = load <16 x i16>, <16 x i16>* %arr - %r = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> %arr.load) + %r = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %arr.load) ret i16 %r } -declare i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>) define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr) { ; CHECK-LABEL: oversized_smax_512 @@ -199,22 +199,22 @@ define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr) { ; CHECK-NEXT: smax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK-NEXT: smaxv {{s[0-9]+}}, [[V0]] %arr.load = load <16 x i32>, <16 x i32>* %arr - %r = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> %arr.load) + %r = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %arr.load) ret i32 %r } -declare i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>) define i16 @oversized_smin_256(<16 x i16>* nocapture readonly %arr) { ; CHECK-LABEL: oversized_smin_256 ; CHECK: smin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: sminv {{h[0-9]+}}, [[V0]] %arr.load = load <16 x i16>, <16 x i16>* %arr - %r = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> %arr.load) + %r = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %arr.load) ret i16 %r } -declare i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>) define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr) { ; CHECK-LABEL: oversized_smin_512 @@ -223,6 +223,6 @@ define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr) { ; CHECK-NEXT: smin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK-NEXT: sminv {{s[0-9]+}}, [[V0]] %arr.load = load <16 x i32>, <16 x i32>* %arr - %r = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> %arr.load) + %r = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %arr.load) ret i32 %r } diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll index 419cafc23186a..23eddd4e174e2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -141,7 +141,7 @@ define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { ret <2 x i64> %tmp4 } -declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) define i16 @uabdl8h_rdx(<16 x i8>* %a, <16 x i8>* %b) { ; CHECK-LABEL: uabdl8h_rdx @@ -155,11 +155,11 @@ define i16 @uabdl8h_rdx(<16 x i8>* %a, <16 x i8>* %b) { %abcmp = icmp slt <16 x i16> %abdiff, zeroinitializer %ababs = sub nsw <16 x i16> zeroinitializer, %abdiff %absel = select <16 x i1> %abcmp, <16 x i16> %ababs, <16 x i16> %abdiff - %reduced_v = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %absel) + %reduced_v = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %absel) ret i16 %reduced_v } -declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) define i32 @uabdl4s_rdx(<8 x i16>* %a, <8 x i16>* %b) { ; CHECK-LABEL: uabdl4s_rdx @@ -173,11 +173,11 @@ define i32 @uabdl4s_rdx(<8 x i16>* %a, <8 x i16>* %b) { %abcmp = icmp slt <8 x i32> %abdiff, zeroinitializer %ababs = sub nsw <8 x i32> zeroinitializer, %abdiff %absel = select <8 x i1> %abcmp, <8 x i32> %ababs, <8 x i32> %abdiff - %reduced_v = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %absel) + %reduced_v = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %absel) ret i32 %reduced_v } -declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) define i64 @uabdl2d_rdx(<4 x i32>* %a, <4 x i32>* %b, i32 %h) { ; CHECK: uabdl2d_rdx @@ -191,7 +191,7 @@ define i64 @uabdl2d_rdx(<4 x i32>* %a, <4 x i32>* %b, i32 %h) { %abcmp = icmp slt <4 x i64> %abdiff, zeroinitializer %ababs = sub nsw <4 x i64> zeroinitializer, %abdiff %absel = select <4 x i1> %abcmp, <4 x i64> %ababs, <4 x i64> %abdiff - %reduced_v = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %absel) + %reduced_v = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %absel) ret i64 %reduced_v } diff --git a/llvm/test/CodeGen/AArch64/neon-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-dot-product.ll index eef89ab6ff391..67435546f9717 100644 --- a/llvm/test/CodeGen/AArch64/neon-dot-product.ll +++ b/llvm/test/CodeGen/AArch64/neon-dot-product.ll @@ -205,7 +205,7 @@ entry: ret void } -declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) define i32 @test_udot_v8i8(i8* nocapture readonly %a, i8* nocapture readonly %b) { entry: @@ -218,7 +218,7 @@ entry: %4 = load <8 x i8>, <8 x i8>* %3 %5 = zext <8 x i8> %4 to <8 x i32> %6 = mul nuw nsw <8 x i32> %5, %2 - %7 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %6) + %7 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %6) ret i32 %7 } @@ -233,11 +233,11 @@ entry: %4 = load <8 x i8>, <8 x i8>* %3 %5 = sext <8 x i8> %4 to <8 x i32> %6 = mul nsw <8 x i32> %5, %2 - %7 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %6) + %7 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %6) ret i32 %7 } -declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) define i32 @test_udot_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %sum) { entry: @@ -250,7 +250,7 @@ entry: %4 = load <16 x i8>, <16 x i8>* %3 %5 = zext <16 x i8> %4 to <16 x i32> %6 = mul nuw nsw <16 x i32> %5, %2 - %7 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %6) + %7 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %6) %op.extra = add i32 %7, %sum ret i32 %op.extra } @@ -265,7 +265,7 @@ entry: %0 = bitcast i8* %a1 to <16 x i8>* %1 = load <16 x i8>, <16 x i8>* %0 %2 = zext <16 x i8> %1 to <16 x i32> - %3 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %2) + %3 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %2) ret i32 %3 } @@ -280,7 +280,7 @@ entry: %4 = load <16 x i8>, <16 x i8>* %3 %5 = sext <16 x i8> %4 to <16 x i32> %6 = mul nsw <16 x i32> %5, %2 - %7 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %6) + %7 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %6) %op.extra = add nsw i32 %7, %sum ret i32 %op.extra } @@ -295,6 +295,6 @@ entry: %0 = bitcast i8* %a1 to <16 x i8>* %1 = load <16 x i8>, <16 x i8>* %0 %2 = sext <16 x i8> %1 to <16 x i32> - %3 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %2) + %3 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %2) ret i32 %3 } diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll index 68501a797178a..10aca253e4e6a 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll @@ -29,7 +29,7 @@ define half @fmaxv_v4f16(<4 x half> %a) #0 { ; CHECK-LABEL: fmaxv_v4f16: ; CHECK: fmaxnmv h0, v0.4h ; CHECK-NEXT: ret - %res = call half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %a) + %res = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) ret half %res } @@ -38,7 +38,7 @@ define half @fmaxv_v8f16(<8 x half> %a) #0 { ; CHECK-LABEL: fmaxv_v8f16: ; CHECK: fmaxnmv h0, v0.8h ; CHECK-NEXT: ret - %res = call half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %a) + %res = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %a) ret half %res } @@ -49,7 +49,7 @@ define half @fmaxv_v16f16(<16 x half>* %a) #0 { ; VBITS_GE_256-NEXT: fmaxnmv h0, [[PG]], [[OP]].h ; VBITS_GE_256-NEXT: ret %op = load <16 x half>, <16 x half>* %a - %res = call half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %op) + %res = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %op) ret half %res } @@ -60,7 +60,7 @@ define half @fmaxv_v32f16(<32 x half>* %a) #0 { ; VBITS_GE_512-NEXT: fmaxnmv h0, [[PG]], [[OP]].h ; VBITS_GE_512-NEXT: ret %op = load <32 x half>, <32 x half>* %a - %res = call half @llvm.experimental.vector.reduce.fmax.v32f16(<32 x half> %op) + %res = call half @llvm.vector.reduce.fmax.v32f16(<32 x half> %op) ret half %res } @@ -71,7 +71,7 @@ define half @fmaxv_v64f16(<64 x half>* %a) #0 { ; VBITS_GE_1024-NEXT: fmaxnmv h0, [[PG]], [[OP]].h ; VBITS_GE_1024-NEXT: ret %op = load <64 x half>, <64 x half>* %a - %res = call half @llvm.experimental.vector.reduce.fmax.v64f16(<64 x half> %op) + %res = call half @llvm.vector.reduce.fmax.v64f16(<64 x half> %op) ret half %res } @@ -82,7 +82,7 @@ define half @fmaxv_v128f16(<128 x half>* %a) #0 { ; VBITS_GE_2048-NEXT: fmaxnmv h0, [[PG]], [[OP]].h ; VBITS_GE_2048-NEXT: ret %op = load <128 x half>, <128 x half>* %a - %res = call half @llvm.experimental.vector.reduce.fmax.v128f16(<128 x half> %op) + %res = call half @llvm.vector.reduce.fmax.v128f16(<128 x half> %op) ret half %res } @@ -91,7 +91,7 @@ define float @fmaxv_v2f32(<2 x float> %a) #0 { ; CHECK-LABEL: fmaxv_v2f32: ; CHECK: fmaxnmp s0, v0.2s ; CHECK: ret - %res = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %a) + %res = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a) ret float %res } @@ -100,7 +100,7 @@ define float @fmaxv_v4f32(<4 x float> %a) #0 { ; CHECK-LABEL: fmaxv_v4f32: ; CHECK: fmaxnmv s0, v0.4s ; CHECK: ret - %res = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a) + %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a) ret float %res } @@ -111,7 +111,7 @@ define float @fmaxv_v8f32(<8 x float>* %a) #0 { ; VBITS_GE_256-NEXT: fmaxnmv s0, [[PG]], [[OP]].s ; VBITS_GE_256-NEXT: ret %op = load <8 x float>, <8 x float>* %a - %res = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %op) + %res = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %op) ret float %res } @@ -122,7 +122,7 @@ define float @fmaxv_v16f32(<16 x float>* %a) #0 { ; VBITS_GE_512-NEXT: fmaxnmv s0, [[PG]], [[OP]].s ; VBITS_GE_512-NEXT: ret %op = load <16 x float>, <16 x float>* %a - %res = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %op) + %res = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %op) ret float %res } @@ -133,7 +133,7 @@ define float @fmaxv_v32f32(<32 x float>* %a) #0 { ; VBITS_GE_1024-NEXT: fmaxnmv s0, [[PG]], [[OP]].s ; VBITS_GE_1024-NEXT: ret %op = load <32 x float>, <32 x float>* %a - %res = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> %op) + %res = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> %op) ret float %res } @@ -144,7 +144,7 @@ define float @fmaxv_v64f32(<64 x float>* %a) #0 { ; VBITS_GE_2048-NEXT: fmaxnmv s0, [[PG]], [[OP]].s ; VBITS_GE_2048-NEXT: ret %op = load <64 x float>, <64 x float>* %a - %res = call float @llvm.experimental.vector.reduce.fmax.v64f32(<64 x float> %op) + %res = call float @llvm.vector.reduce.fmax.v64f32(<64 x float> %op) ret float %res } @@ -153,7 +153,7 @@ define double @fmaxv_v1f64(<1 x double> %a) #0 { ; CHECK-LABEL: fmaxv_v1f64: ; CHECK-NOT: fmax ; CHECK: ret - %res = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a) + %res = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a) ret double %res } @@ -162,7 +162,7 @@ define double @fmaxv_v2f64(<2 x double> %a) #0 { ; CHECK-LABEL: fmaxv_v2f64: ; CHECK: fmaxnmp d0, v0.2d ; CHECK-NEXT: ret - %res = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a) + %res = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a) ret double %res } @@ -173,7 +173,7 @@ define double @fmaxv_v4f64(<4 x double>* %a) #0 { ; VBITS_GE_256-NEXT: fmaxnmv d0, [[PG]], [[OP]].d ; VBITS_GE_256-NEXT: ret %op = load <4 x double>, <4 x double>* %a - %res = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %op) + %res = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %op) ret double %res } @@ -184,7 +184,7 @@ define double @fmaxv_v8f64(<8 x double>* %a) #0 { ; VBITS_GE_512-NEXT: fmaxnmv d0, [[PG]], [[OP]].d ; VBITS_GE_512-NEXT: ret %op = load <8 x double>, <8 x double>* %a - %res = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> %op) + %res = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %op) ret double %res } @@ -195,7 +195,7 @@ define double @fmaxv_v16f64(<16 x double>* %a) #0 { ; VBITS_GE_1024-NEXT: fmaxnmv d0, [[PG]], [[OP]].d ; VBITS_GE_1024-NEXT: ret %op = load <16 x double>, <16 x double>* %a - %res = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> %op) + %res = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> %op) ret double %res } @@ -206,7 +206,7 @@ define double @fmaxv_v32f64(<32 x double>* %a) #0 { ; VBITS_GE_2048-NEXT: fmaxnmv d0, [[PG]], [[OP]].d ; VBITS_GE_2048-NEXT: ret %op = load <32 x double>, <32 x double>* %a - %res = call double @llvm.experimental.vector.reduce.fmax.v32f64(<32 x double> %op) + %res = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %op) ret double %res } @@ -219,7 +219,7 @@ define half @fminv_v4f16(<4 x half> %a) #0 { ; CHECK-LABEL: fminv_v4f16: ; CHECK: fminnmv h0, v0.4h ; CHECK-NEXT: ret - %res = call half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %a) + %res = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a) ret half %res } @@ -228,7 +228,7 @@ define half @fminv_v8f16(<8 x half> %a) #0 { ; CHECK-LABEL: fminv_v8f16: ; CHECK: fminnmv h0, v0.8h ; CHECK-NEXT: ret - %res = call half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %a) + %res = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %a) ret half %res } @@ -239,7 +239,7 @@ define half @fminv_v16f16(<16 x half>* %a) #0 { ; VBITS_GE_256-NEXT: fminnmv h0, [[PG]], [[OP]].h ; VBITS_GE_256-NEXT: ret %op = load <16 x half>, <16 x half>* %a - %res = call half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %op) + %res = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %op) ret half %res } @@ -250,7 +250,7 @@ define half @fminv_v32f16(<32 x half>* %a) #0 { ; VBITS_GE_512-NEXT: fminnmv h0, [[PG]], [[OP]].h ; VBITS_GE_512-NEXT: ret %op = load <32 x half>, <32 x half>* %a - %res = call half @llvm.experimental.vector.reduce.fmin.v32f16(<32 x half> %op) + %res = call half @llvm.vector.reduce.fmin.v32f16(<32 x half> %op) ret half %res } @@ -261,7 +261,7 @@ define half @fminv_v64f16(<64 x half>* %a) #0 { ; VBITS_GE_1024-NEXT: fminnmv h0, [[PG]], [[OP]].h ; VBITS_GE_1024-NEXT: ret %op = load <64 x half>, <64 x half>* %a - %res = call half @llvm.experimental.vector.reduce.fmin.v64f16(<64 x half> %op) + %res = call half @llvm.vector.reduce.fmin.v64f16(<64 x half> %op) ret half %res } @@ -272,7 +272,7 @@ define half @fminv_v128f16(<128 x half>* %a) #0 { ; VBITS_GE_2048-NEXT: fminnmv h0, [[PG]], [[OP]].h ; VBITS_GE_2048-NEXT: ret %op = load <128 x half>, <128 x half>* %a - %res = call half @llvm.experimental.vector.reduce.fmin.v128f16(<128 x half> %op) + %res = call half @llvm.vector.reduce.fmin.v128f16(<128 x half> %op) ret half %res } @@ -281,7 +281,7 @@ define float @fminv_v2f32(<2 x float> %a) #0 { ; CHECK-LABEL: fminv_v2f32: ; CHECK: fminnmp s0, v0.2s ; CHECK: ret - %res = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %a) + %res = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a) ret float %res } @@ -290,7 +290,7 @@ define float @fminv_v4f32(<4 x float> %a) #0 { ; CHECK-LABEL: fminv_v4f32: ; CHECK: fminnmv s0, v0.4s ; CHECK: ret - %res = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a) + %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a) ret float %res } @@ -301,7 +301,7 @@ define float @fminv_v8f32(<8 x float>* %a) #0 { ; VBITS_GE_256-NEXT: fminnmv s0, [[PG]], [[OP]].s ; VBITS_GE_256-NEXT: ret %op = load <8 x float>, <8 x float>* %a - %res = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %op) + %res = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %op) ret float %res } @@ -312,7 +312,7 @@ define float @fminv_v16f32(<16 x float>* %a) #0 { ; VBITS_GE_512-NEXT: fminnmv s0, [[PG]], [[OP]].s ; VBITS_GE_512-NEXT: ret %op = load <16 x float>, <16 x float>* %a - %res = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %op) + %res = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %op) ret float %res } @@ -323,7 +323,7 @@ define float @fminv_v32f32(<32 x float>* %a) #0 { ; VBITS_GE_1024-NEXT: fminnmv s0, [[PG]], [[OP]].s ; VBITS_GE_1024-NEXT: ret %op = load <32 x float>, <32 x float>* %a - %res = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> %op) + %res = call float @llvm.vector.reduce.fmin.v32f32(<32 x float> %op) ret float %res } @@ -334,7 +334,7 @@ define float @fminv_v64f32(<64 x float>* %a) #0 { ; VBITS_GE_2048-NEXT: fminnmv s0, [[PG]], [[OP]].s ; VBITS_GE_2048-NEXT: ret %op = load <64 x float>, <64 x float>* %a - %res = call float @llvm.experimental.vector.reduce.fmin.v64f32(<64 x float> %op) + %res = call float @llvm.vector.reduce.fmin.v64f32(<64 x float> %op) ret float %res } @@ -343,7 +343,7 @@ define double @fminv_v1f64(<1 x double> %a) #0 { ; CHECK-LABEL: fminv_v1f64: ; CHECK-NOT: fmin ; CHECK: ret - %res = call double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %a) + %res = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a) ret double %res } @@ -352,7 +352,7 @@ define double @fminv_v2f64(<2 x double> %a) #0 { ; CHECK-LABEL: fminv_v2f64: ; CHECK: fminnmp d0, v0.2d ; CHECK-NEXT: ret - %res = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a) + %res = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a) ret double %res } @@ -363,7 +363,7 @@ define double @fminv_v4f64(<4 x double>* %a) #0 { ; VBITS_GE_256-NEXT: fminnmv d0, [[PG]], [[OP]].d ; VBITS_GE_256-NEXT: ret %op = load <4 x double>, <4 x double>* %a - %res = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %op) + %res = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %op) ret double %res } @@ -374,7 +374,7 @@ define double @fminv_v8f64(<8 x double>* %a) #0 { ; VBITS_GE_512-NEXT: fminnmv d0, [[PG]], [[OP]].d ; VBITS_GE_512-NEXT: ret %op = load <8 x double>, <8 x double>* %a - %res = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> %op) + %res = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %op) ret double %res } @@ -385,7 +385,7 @@ define double @fminv_v16f64(<16 x double>* %a) #0 { ; VBITS_GE_1024-NEXT: fminnmv d0, [[PG]], [[OP]].d ; VBITS_GE_1024-NEXT: ret %op = load <16 x double>, <16 x double>* %a - %res = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> %op) + %res = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> %op) ret double %res } @@ -396,50 +396,50 @@ define double @fminv_v32f64(<32 x double>* %a) #0 { ; VBITS_GE_2048-NEXT: fminnmv d0, [[PG]], [[OP]].d ; VBITS_GE_2048-NEXT: ret %op = load <32 x double>, <32 x double>* %a - %res = call double @llvm.experimental.vector.reduce.fmin.v32f64(<32 x double> %op) + %res = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %op) ret double %res } attributes #0 = { "target-features"="+sve" } -declare half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half>) -declare half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half>) -declare half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half>) -declare half @llvm.experimental.vector.reduce.fmax.v32f16(<32 x half>) -declare half @llvm.experimental.vector.reduce.fmax.v64f16(<64 x half>) -declare half @llvm.experimental.vector.reduce.fmax.v128f16(<128 x half>) - -declare float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v64f32(<64 x float>) - -declare double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v32f64(<32 x double>) - -declare half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half>) -declare half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half>) -declare half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half>) -declare half @llvm.experimental.vector.reduce.fmin.v32f16(<32 x half>) -declare half @llvm.experimental.vector.reduce.fmin.v64f16(<64 x half>) -declare half @llvm.experimental.vector.reduce.fmin.v128f16(<128 x half>) - -declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v64f32(<64 x float>) - -declare double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v32f64(<32 x double>) +declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>) +declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>) +declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>) +declare half @llvm.vector.reduce.fmax.v32f16(<32 x half>) +declare half @llvm.vector.reduce.fmax.v64f16(<64 x half>) +declare half @llvm.vector.reduce.fmax.v128f16(<128 x half>) + +declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>) +declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) +declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>) +declare float @llvm.vector.reduce.fmax.v32f32(<32 x float>) +declare float @llvm.vector.reduce.fmax.v64f32(<64 x float>) + +declare double @llvm.vector.reduce.fmax.v1f64(<1 x double>) +declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) +declare double @llvm.vector.reduce.fmax.v8f64(<8 x double>) +declare double @llvm.vector.reduce.fmax.v16f64(<16 x double>) +declare double @llvm.vector.reduce.fmax.v32f64(<32 x double>) + +declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>) +declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>) +declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>) +declare half @llvm.vector.reduce.fmin.v32f16(<32 x half>) +declare half @llvm.vector.reduce.fmin.v64f16(<64 x half>) +declare half @llvm.vector.reduce.fmin.v128f16(<128 x half>) + +declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) +declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) +declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>) +declare float @llvm.vector.reduce.fmin.v32f32(<32 x float>) +declare float @llvm.vector.reduce.fmin.v64f32(<64 x float>) + +declare double @llvm.vector.reduce.fmin.v1f64(<1 x double>) +declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) +declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>) +declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>) +declare double @llvm.vector.reduce.fmin.v32f64(<32 x double>) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll index 633fe19efb026..4967f53d1dfe2 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll @@ -29,7 +29,7 @@ define i8 @uaddv_v8i8(<8 x i8> %a) #0 { ; CHECK-LABEL: uaddv_v8i8: ; CHECK: addv b0, v0.8b ; CHECK: ret - %res = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %a) + %res = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a) ret i8 %res } @@ -38,7 +38,7 @@ define i8 @uaddv_v16i8(<16 x i8> %a) #0 { ; CHECK-LABEL: uaddv_v16i8: ; CHECK: addv b0, v0.16b ; CHECK: ret - %res = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %a) + %res = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a) ret i8 %res } @@ -50,7 +50,7 @@ define i8 @uaddv_v32i8(<32 x i8>* %a) #0 { ; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <32 x i8>, <32 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> %op) + %res = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %op) ret i8 %res } @@ -72,7 +72,7 @@ define i8 @uaddv_v64i8(<64 x i8>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <64 x i8>, <64 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> %op) + %res = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> %op) ret i8 %res } @@ -84,7 +84,7 @@ define i8 @uaddv_v128i8(<128 x i8>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <128 x i8>, <128 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> %op) + %res = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> %op) ret i8 %res } @@ -96,7 +96,7 @@ define i8 @uaddv_v256i8(<256 x i8>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <256 x i8>, <256 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.add.v256i8(<256 x i8> %op) + %res = call i8 @llvm.vector.reduce.add.v256i8(<256 x i8> %op) ret i8 %res } @@ -105,7 +105,7 @@ define i16 @uaddv_v4i16(<4 x i16> %a) #0 { ; CHECK-LABEL: uaddv_v4i16: ; CHECK: addv h0, v0.4h ; CHECK: ret - %res = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> %a) + %res = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a) ret i16 %res } @@ -114,7 +114,7 @@ define i16 @uaddv_v8i16(<8 x i16> %a) #0 { ; CHECK-LABEL: uaddv_v8i16: ; CHECK: addv h0, v0.8h ; CHECK: ret - %res = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %a) + %res = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a) ret i16 %res } @@ -126,7 +126,7 @@ define i16 @uaddv_v16i16(<16 x i16>* %a) #0 { ; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <16 x i16>, <16 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %op) + %res = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %op) ret i16 %res } @@ -148,7 +148,7 @@ define i16 @uaddv_v32i16(<32 x i16>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <32 x i16>, <32 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> %op) + %res = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> %op) ret i16 %res } @@ -160,7 +160,7 @@ define i16 @uaddv_v64i16(<64 x i16>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <64 x i16>, <64 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> %op) + %res = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> %op) ret i16 %res } @@ -172,7 +172,7 @@ define i16 @uaddv_v128i16(<128 x i16>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <128 x i16>, <128 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.add.v128i16(<128 x i16> %op) + %res = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> %op) ret i16 %res } @@ -181,7 +181,7 @@ define i32 @uaddv_v2i32(<2 x i32> %a) #0 { ; CHECK-LABEL: uaddv_v2i32: ; CHECK: addp v0.2s, v0.2s ; CHECK: ret - %res = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> %a) + %res = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a) ret i32 %res } @@ -190,7 +190,7 @@ define i32 @uaddv_v4i32(<4 x i32> %a) #0 { ; CHECK-LABEL: uaddv_v4i32: ; CHECK: addv s0, v0.4s ; CHECK: ret - %res = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %a) + %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a) ret i32 %res } @@ -202,7 +202,7 @@ define i32 @uaddv_v8i32(<8 x i32>* %a) #0 { ; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <8 x i32>, <8 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %op) + %res = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %op) ret i32 %res } @@ -224,7 +224,7 @@ define i32 @uaddv_v16i32(<16 x i32>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <16 x i32>, <16 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %op) + %res = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %op) ret i32 %res } @@ -236,7 +236,7 @@ define i32 @uaddv_v32i32(<32 x i32>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <32 x i32>, <32 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> %op) + %res = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %op) ret i32 %res } @@ -248,7 +248,7 @@ define i32 @uaddv_v64i32(<64 x i32>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <64 x i32>, <64 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.add.v64i32(<64 x i32> %op) + %res = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> %op) ret i32 %res } @@ -257,7 +257,7 @@ define i64 @uaddv_v1i64(<1 x i64> %a) #0 { ; CHECK-LABEL: uaddv_v1i64: ; CHECK: fmov x0, d0 ; CHECK: ret - %res = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> %a) + %res = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a) ret i64 %res } @@ -266,7 +266,7 @@ define i64 @uaddv_v2i64(<2 x i64> %a) #0 { ; CHECK-LABEL: uaddv_v2i64: ; CHECK: addp d0, v0.2d ; CHECK: ret - %res = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %a) + %res = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a) ret i64 %res } @@ -278,7 +278,7 @@ define i64 @uaddv_v4i64(<4 x i64>* %a) #0 { ; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <4 x i64>, <4 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %op) + %res = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %op) ret i64 %res } @@ -300,7 +300,7 @@ define i64 @uaddv_v8i64(<8 x i64>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <8 x i64>, <8 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %op) + %res = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %op) ret i64 %res } @@ -312,7 +312,7 @@ define i64 @uaddv_v16i64(<16 x i64>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <16 x i64>, <16 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %op) + %res = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %op) ret i64 %res } @@ -324,7 +324,7 @@ define i64 @uaddv_v32i64(<32 x i64>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <32 x i64>, <32 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.add.v32i64(<32 x i64> %op) + %res = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> %op) ret i64 %res } @@ -337,7 +337,7 @@ define i8 @smaxv_v8i8(<8 x i8> %a) #0 { ; CHECK-LABEL: smaxv_v8i8: ; CHECK: smaxv b0, v0.8b ; CHECK: ret - %res = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> %a) + %res = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %a) ret i8 %res } @@ -346,7 +346,7 @@ define i8 @smaxv_v16i8(<16 x i8> %a) #0 { ; CHECK-LABEL: smaxv_v16i8: ; CHECK: smaxv b0, v0.16b ; CHECK: ret - %res = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %a) + %res = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %a) ret i8 %res } @@ -358,7 +358,7 @@ define i8 @smaxv_v32i8(<32 x i8>* %a) #0 { ; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <32 x i8>, <32 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> %op) + %res = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %op) ret i8 %res } @@ -380,7 +380,7 @@ define i8 @smaxv_v64i8(<64 x i8>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <64 x i8>, <64 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> %op) + %res = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> %op) ret i8 %res } @@ -392,7 +392,7 @@ define i8 @smaxv_v128i8(<128 x i8>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <128 x i8>, <128 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> %op) + %res = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> %op) ret i8 %res } @@ -404,7 +404,7 @@ define i8 @smaxv_v256i8(<256 x i8>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <256 x i8>, <256 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.smax.v256i8(<256 x i8> %op) + %res = call i8 @llvm.vector.reduce.smax.v256i8(<256 x i8> %op) ret i8 %res } @@ -413,7 +413,7 @@ define i16 @smaxv_v4i16(<4 x i16> %a) #0 { ; CHECK-LABEL: smaxv_v4i16: ; CHECK: smaxv h0, v0.4h ; CHECK: ret - %res = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> %a) + %res = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %a) ret i16 %res } @@ -422,7 +422,7 @@ define i16 @smaxv_v8i16(<8 x i16> %a) #0 { ; CHECK-LABEL: smaxv_v8i16: ; CHECK: smaxv h0, v0.8h ; CHECK: ret - %res = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %a) + %res = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %a) ret i16 %res } @@ -434,7 +434,7 @@ define i16 @smaxv_v16i16(<16 x i16>* %a) #0 { ; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <16 x i16>, <16 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> %op) + %res = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %op) ret i16 %res } @@ -456,7 +456,7 @@ define i16 @smaxv_v32i16(<32 x i16>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <32 x i16>, <32 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> %op) + %res = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> %op) ret i16 %res } @@ -468,7 +468,7 @@ define i16 @smaxv_v64i16(<64 x i16>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <64 x i16>, <64 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> %op) + %res = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> %op) ret i16 %res } @@ -480,7 +480,7 @@ define i16 @smaxv_v128i16(<128 x i16>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <128 x i16>, <128 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.smax.v128i16(<128 x i16> %op) + %res = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> %op) ret i16 %res } @@ -489,7 +489,7 @@ define i32 @smaxv_v2i32(<2 x i32> %a) #0 { ; CHECK-LABEL: smaxv_v2i32: ; CHECK: smaxp v0.2s, v0.2s ; CHECK: ret - %res = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> %a) + %res = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %a) ret i32 %res } @@ -498,7 +498,7 @@ define i32 @smaxv_v4i32(<4 x i32> %a) #0 { ; CHECK-LABEL: smaxv_v4i32: ; CHECK: smaxv s0, v0.4s ; CHECK: ret - %res = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %a) + %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a) ret i32 %res } @@ -510,7 +510,7 @@ define i32 @smaxv_v8i32(<8 x i32>* %a) #0 { ; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <8 x i32>, <8 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> %op) + %res = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %op) ret i32 %res } @@ -532,7 +532,7 @@ define i32 @smaxv_v16i32(<16 x i32>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <16 x i32>, <16 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> %op) + %res = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %op) ret i32 %res } @@ -544,7 +544,7 @@ define i32 @smaxv_v32i32(<32 x i32>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <32 x i32>, <32 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> %op) + %res = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> %op) ret i32 %res } @@ -556,7 +556,7 @@ define i32 @smaxv_v64i32(<64 x i32>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <64 x i32>, <64 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.smax.v64i32(<64 x i32> %op) + %res = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> %op) ret i32 %res } @@ -565,7 +565,7 @@ define i64 @smaxv_v1i64(<1 x i64> %a) #0 { ; CHECK-LABEL: smaxv_v1i64: ; CHECK: fmov x0, d0 ; CHECK: ret - %res = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> %a) + %res = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> %a) ret i64 %res } @@ -576,7 +576,7 @@ define i64 @smaxv_v2i64(<2 x i64> %a) #0 { ; CHECK-NEXT: smaxv [[REDUCE:d[0-9]+]], [[PG]], z0.d ; CHECK-NEXT: fmov x0, [[REDUCE]] ; CHECK-NEXT: ret - %res = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %a) + %res = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %a) ret i64 %res } @@ -588,7 +588,7 @@ define i64 @smaxv_v4i64(<4 x i64>* %a) #0 { ; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <4 x i64>, <4 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> %op) + %res = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %op) ret i64 %res } @@ -610,7 +610,7 @@ define i64 @smaxv_v8i64(<8 x i64>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <8 x i64>, <8 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> %op) + %res = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %op) ret i64 %res } @@ -622,7 +622,7 @@ define i64 @smaxv_v16i64(<16 x i64>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <16 x i64>, <16 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> %op) + %res = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> %op) ret i64 %res } @@ -634,7 +634,7 @@ define i64 @smaxv_v32i64(<32 x i64>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <32 x i64>, <32 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.smax.v32i64(<32 x i64> %op) + %res = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> %op) ret i64 %res } @@ -647,7 +647,7 @@ define i8 @sminv_v8i8(<8 x i8> %a) #0 { ; CHECK-LABEL: sminv_v8i8: ; CHECK: sminv b0, v0.8b ; CHECK: ret - %res = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> %a) + %res = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %a) ret i8 %res } @@ -656,7 +656,7 @@ define i8 @sminv_v16i8(<16 x i8> %a) #0 { ; CHECK-LABEL: sminv_v16i8: ; CHECK: sminv b0, v0.16b ; CHECK: ret - %res = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %a) + %res = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %a) ret i8 %res } @@ -668,7 +668,7 @@ define i8 @sminv_v32i8(<32 x i8>* %a) #0 { ; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <32 x i8>, <32 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> %op) + %res = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %op) ret i8 %res } @@ -690,7 +690,7 @@ define i8 @sminv_v64i8(<64 x i8>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <64 x i8>, <64 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> %op) + %res = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> %op) ret i8 %res } @@ -702,7 +702,7 @@ define i8 @sminv_v128i8(<128 x i8>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <128 x i8>, <128 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> %op) + %res = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> %op) ret i8 %res } @@ -714,7 +714,7 @@ define i8 @sminv_v256i8(<256 x i8>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <256 x i8>, <256 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.smin.v256i8(<256 x i8> %op) + %res = call i8 @llvm.vector.reduce.smin.v256i8(<256 x i8> %op) ret i8 %res } @@ -723,7 +723,7 @@ define i16 @sminv_v4i16(<4 x i16> %a) #0 { ; CHECK-LABEL: sminv_v4i16: ; CHECK: sminv h0, v0.4h ; CHECK: ret - %res = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> %a) + %res = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %a) ret i16 %res } @@ -732,7 +732,7 @@ define i16 @sminv_v8i16(<8 x i16> %a) #0 { ; CHECK-LABEL: sminv_v8i16: ; CHECK: sminv h0, v0.8h ; CHECK: ret - %res = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %a) + %res = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %a) ret i16 %res } @@ -744,7 +744,7 @@ define i16 @sminv_v16i16(<16 x i16>* %a) #0 { ; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <16 x i16>, <16 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> %op) + %res = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %op) ret i16 %res } @@ -766,7 +766,7 @@ define i16 @sminv_v32i16(<32 x i16>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <32 x i16>, <32 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> %op) + %res = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> %op) ret i16 %res } @@ -778,7 +778,7 @@ define i16 @sminv_v64i16(<64 x i16>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <64 x i16>, <64 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> %op) + %res = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> %op) ret i16 %res } @@ -790,7 +790,7 @@ define i16 @sminv_v128i16(<128 x i16>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <128 x i16>, <128 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.smin.v128i16(<128 x i16> %op) + %res = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> %op) ret i16 %res } @@ -799,7 +799,7 @@ define i32 @sminv_v2i32(<2 x i32> %a) #0 { ; CHECK-LABEL: sminv_v2i32: ; CHECK: minp v0.2s, v0.2s ; CHECK: ret - %res = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> %a) + %res = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %a) ret i32 %res } @@ -808,7 +808,7 @@ define i32 @sminv_v4i32(<4 x i32> %a) #0 { ; CHECK-LABEL: sminv_v4i32: ; CHECK: sminv s0, v0.4s ; CHECK: ret - %res = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %a) + %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a) ret i32 %res } @@ -820,7 +820,7 @@ define i32 @sminv_v8i32(<8 x i32>* %a) #0 { ; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <8 x i32>, <8 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> %op) + %res = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %op) ret i32 %res } @@ -842,7 +842,7 @@ define i32 @sminv_v16i32(<16 x i32>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <16 x i32>, <16 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> %op) + %res = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %op) ret i32 %res } @@ -854,7 +854,7 @@ define i32 @sminv_v32i32(<32 x i32>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <32 x i32>, <32 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> %op) + %res = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> %op) ret i32 %res } @@ -866,7 +866,7 @@ define i32 @sminv_v64i32(<64 x i32>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <64 x i32>, <64 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.smin.v64i32(<64 x i32> %op) + %res = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> %op) ret i32 %res } @@ -875,7 +875,7 @@ define i64 @sminv_v1i64(<1 x i64> %a) #0 { ; CHECK-LABEL: sminv_v1i64: ; CHECK: fmov x0, d0 ; CHECK: ret - %res = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> %a) + %res = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> %a) ret i64 %res } @@ -886,7 +886,7 @@ define i64 @sminv_v2i64(<2 x i64> %a) #0 { ; CHECK-NEXT: sminv [[REDUCE:d[0-9]+]], [[PG]], z0.d ; CHECK-NEXT: fmov x0, [[REDUCE]] ; CHECK-NEXT: ret - %res = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %a) + %res = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %a) ret i64 %res } @@ -898,7 +898,7 @@ define i64 @sminv_v4i64(<4 x i64>* %a) #0 { ; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <4 x i64>, <4 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> %op) + %res = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %op) ret i64 %res } @@ -920,7 +920,7 @@ define i64 @sminv_v8i64(<8 x i64>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <8 x i64>, <8 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> %op) + %res = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %op) ret i64 %res } @@ -932,7 +932,7 @@ define i64 @sminv_v16i64(<16 x i64>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <16 x i64>, <16 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> %op) + %res = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> %op) ret i64 %res } @@ -944,7 +944,7 @@ define i64 @sminv_v32i64(<32 x i64>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <32 x i64>, <32 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.smin.v32i64(<32 x i64> %op) + %res = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> %op) ret i64 %res } @@ -957,7 +957,7 @@ define i8 @umaxv_v8i8(<8 x i8> %a) #0 { ; CHECK-LABEL: umaxv_v8i8: ; CHECK: umaxv b0, v0.8b ; CHECK: ret - %res = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> %a) + %res = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %a) ret i8 %res } @@ -966,7 +966,7 @@ define i8 @umaxv_v16i8(<16 x i8> %a) #0 { ; CHECK-LABEL: umaxv_v16i8: ; CHECK: umaxv b0, v0.16b ; CHECK: ret - %res = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %a) + %res = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %a) ret i8 %res } @@ -978,7 +978,7 @@ define i8 @umaxv_v32i8(<32 x i8>* %a) #0 { ; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <32 x i8>, <32 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> %op) + %res = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %op) ret i8 %res } @@ -1000,7 +1000,7 @@ define i8 @umaxv_v64i8(<64 x i8>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <64 x i8>, <64 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> %op) + %res = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> %op) ret i8 %res } @@ -1012,7 +1012,7 @@ define i8 @umaxv_v128i8(<128 x i8>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <128 x i8>, <128 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> %op) + %res = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> %op) ret i8 %res } @@ -1024,7 +1024,7 @@ define i8 @umaxv_v256i8(<256 x i8>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <256 x i8>, <256 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.umax.v256i8(<256 x i8> %op) + %res = call i8 @llvm.vector.reduce.umax.v256i8(<256 x i8> %op) ret i8 %res } @@ -1033,7 +1033,7 @@ define i16 @umaxv_v4i16(<4 x i16> %a) #0 { ; CHECK-LABEL: umaxv_v4i16: ; CHECK: umaxv h0, v0.4h ; CHECK: ret - %res = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> %a) + %res = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %a) ret i16 %res } @@ -1042,7 +1042,7 @@ define i16 @umaxv_v8i16(<8 x i16> %a) #0 { ; CHECK-LABEL: umaxv_v8i16: ; CHECK: umaxv h0, v0.8h ; CHECK: ret - %res = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %a) + %res = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %a) ret i16 %res } @@ -1054,7 +1054,7 @@ define i16 @umaxv_v16i16(<16 x i16>* %a) #0 { ; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <16 x i16>, <16 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> %op) + %res = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %op) ret i16 %res } @@ -1076,7 +1076,7 @@ define i16 @umaxv_v32i16(<32 x i16>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <32 x i16>, <32 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> %op) + %res = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> %op) ret i16 %res } @@ -1088,7 +1088,7 @@ define i16 @umaxv_v64i16(<64 x i16>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <64 x i16>, <64 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> %op) + %res = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> %op) ret i16 %res } @@ -1100,7 +1100,7 @@ define i16 @umaxv_v128i16(<128 x i16>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <128 x i16>, <128 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.umax.v128i16(<128 x i16> %op) + %res = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> %op) ret i16 %res } @@ -1109,7 +1109,7 @@ define i32 @umaxv_v2i32(<2 x i32> %a) #0 { ; CHECK-LABEL: umaxv_v2i32: ; CHECK: umaxp v0.2s, v0.2s ; CHECK: ret - %res = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> %a) + %res = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %a) ret i32 %res } @@ -1118,7 +1118,7 @@ define i32 @umaxv_v4i32(<4 x i32> %a) #0 { ; CHECK-LABEL: umaxv_v4i32: ; CHECK: umaxv s0, v0.4s ; CHECK: ret - %res = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %a) + %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a) ret i32 %res } @@ -1130,7 +1130,7 @@ define i32 @umaxv_v8i32(<8 x i32>* %a) #0 { ; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <8 x i32>, <8 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %op) + %res = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %op) ret i32 %res } @@ -1152,7 +1152,7 @@ define i32 @umaxv_v16i32(<16 x i32>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <16 x i32>, <16 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %op) + %res = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %op) ret i32 %res } @@ -1164,7 +1164,7 @@ define i32 @umaxv_v32i32(<32 x i32>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <32 x i32>, <32 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> %op) + %res = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> %op) ret i32 %res } @@ -1176,7 +1176,7 @@ define i32 @umaxv_v64i32(<64 x i32>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <64 x i32>, <64 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.umax.v64i32(<64 x i32> %op) + %res = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> %op) ret i32 %res } @@ -1185,7 +1185,7 @@ define i64 @umaxv_v1i64(<1 x i64> %a) #0 { ; CHECK-LABEL: umaxv_v1i64: ; CHECK: fmov x0, d0 ; CHECK: ret - %res = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> %a) + %res = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> %a) ret i64 %res } @@ -1196,7 +1196,7 @@ define i64 @umaxv_v2i64(<2 x i64> %a) #0 { ; CHECK-NEXT: umaxv [[REDUCE:d[0-9]+]], [[PG]], z0.d ; CHECK-NEXT: fmov x0, [[REDUCE]] ; CHECK-NEXT: ret - %res = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %a) + %res = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a) ret i64 %res } @@ -1208,7 +1208,7 @@ define i64 @umaxv_v4i64(<4 x i64>* %a) #0 { ; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <4 x i64>, <4 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> %op) + %res = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %op) ret i64 %res } @@ -1230,7 +1230,7 @@ define i64 @umaxv_v8i64(<8 x i64>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <8 x i64>, <8 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> %op) + %res = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %op) ret i64 %res } @@ -1242,7 +1242,7 @@ define i64 @umaxv_v16i64(<16 x i64>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <16 x i64>, <16 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> %op) + %res = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> %op) ret i64 %res } @@ -1254,7 +1254,7 @@ define i64 @umaxv_v32i64(<32 x i64>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <32 x i64>, <32 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.umax.v32i64(<32 x i64> %op) + %res = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> %op) ret i64 %res } @@ -1267,7 +1267,7 @@ define i8 @uminv_v8i8(<8 x i8> %a) #0 { ; CHECK-LABEL: uminv_v8i8: ; CHECK: uminv b0, v0.8b ; CHECK: ret - %res = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> %a) + %res = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %a) ret i8 %res } @@ -1276,7 +1276,7 @@ define i8 @uminv_v16i8(<16 x i8> %a) #0 { ; CHECK-LABEL: uminv_v16i8: ; CHECK: uminv b0, v0.16b ; CHECK: ret - %res = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %a) + %res = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %a) ret i8 %res } @@ -1288,7 +1288,7 @@ define i8 @uminv_v32i8(<32 x i8>* %a) #0 { ; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <32 x i8>, <32 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> %op) + %res = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %op) ret i8 %res } @@ -1310,7 +1310,7 @@ define i8 @uminv_v64i8(<64 x i8>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <64 x i8>, <64 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> %op) + %res = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> %op) ret i8 %res } @@ -1322,7 +1322,7 @@ define i8 @uminv_v128i8(<128 x i8>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <128 x i8>, <128 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> %op) + %res = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> %op) ret i8 %res } @@ -1334,7 +1334,7 @@ define i8 @uminv_v256i8(<256 x i8>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <256 x i8>, <256 x i8>* %a - %res = call i8 @llvm.experimental.vector.reduce.umin.v256i8(<256 x i8> %op) + %res = call i8 @llvm.vector.reduce.umin.v256i8(<256 x i8> %op) ret i8 %res } @@ -1343,7 +1343,7 @@ define i16 @uminv_v4i16(<4 x i16> %a) #0 { ; CHECK-LABEL: uminv_v4i16: ; CHECK: uminv h0, v0.4h ; CHECK: ret - %res = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> %a) + %res = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %a) ret i16 %res } @@ -1352,7 +1352,7 @@ define i16 @uminv_v8i16(<8 x i16> %a) #0 { ; CHECK-LABEL: uminv_v8i16: ; CHECK: uminv h0, v0.8h ; CHECK: ret - %res = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %a) + %res = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %a) ret i16 %res } @@ -1364,7 +1364,7 @@ define i16 @uminv_v16i16(<16 x i16>* %a) #0 { ; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <16 x i16>, <16 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> %op) + %res = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %op) ret i16 %res } @@ -1386,7 +1386,7 @@ define i16 @uminv_v32i16(<32 x i16>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <32 x i16>, <32 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> %op) + %res = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> %op) ret i16 %res } @@ -1398,7 +1398,7 @@ define i16 @uminv_v64i16(<64 x i16>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <64 x i16>, <64 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> %op) + %res = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> %op) ret i16 %res } @@ -1410,7 +1410,7 @@ define i16 @uminv_v128i16(<128 x i16>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <128 x i16>, <128 x i16>* %a - %res = call i16 @llvm.experimental.vector.reduce.umin.v128i16(<128 x i16> %op) + %res = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> %op) ret i16 %res } @@ -1419,7 +1419,7 @@ define i32 @uminv_v2i32(<2 x i32> %a) #0 { ; CHECK-LABEL: uminv_v2i32: ; CHECK: minp v0.2s, v0.2s ; CHECK: ret - %res = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> %a) + %res = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %a) ret i32 %res } @@ -1428,7 +1428,7 @@ define i32 @uminv_v4i32(<4 x i32> %a) #0 { ; CHECK-LABEL: uminv_v4i32: ; CHECK: uminv s0, v0.4s ; CHECK: ret - %res = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %a) + %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a) ret i32 %res } @@ -1440,7 +1440,7 @@ define i32 @uminv_v8i32(<8 x i32>* %a) #0 { ; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <8 x i32>, <8 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> %op) + %res = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %op) ret i32 %res } @@ -1462,7 +1462,7 @@ define i32 @uminv_v16i32(<16 x i32>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <16 x i32>, <16 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> %op) + %res = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %op) ret i32 %res } @@ -1474,7 +1474,7 @@ define i32 @uminv_v32i32(<32 x i32>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <32 x i32>, <32 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> %op) + %res = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> %op) ret i32 %res } @@ -1486,7 +1486,7 @@ define i32 @uminv_v64i32(<64 x i32>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <64 x i32>, <64 x i32>* %a - %res = call i32 @llvm.experimental.vector.reduce.umin.v64i32(<64 x i32> %op) + %res = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> %op) ret i32 %res } @@ -1495,7 +1495,7 @@ define i64 @uminv_v1i64(<1 x i64> %a) #0 { ; CHECK-LABEL: uminv_v1i64: ; CHECK: fmov x0, d0 ; CHECK: ret - %res = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> %a) + %res = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> %a) ret i64 %res } @@ -1506,7 +1506,7 @@ define i64 @uminv_v2i64(<2 x i64> %a) #0 { ; CHECK-NEXT: uminv [[REDUCE:d[0-9]+]], [[PG]], z0.d ; CHECK-NEXT: fmov x0, [[REDUCE]] ; CHECK-NEXT: ret - %res = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %a) + %res = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %a) ret i64 %res } @@ -1518,7 +1518,7 @@ define i64 @uminv_v4i64(<4 x i64>* %a) #0 { ; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_256-NEXT: ret %op = load <4 x i64>, <4 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> %op) + %res = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %op) ret i64 %res } @@ -1540,7 +1540,7 @@ define i64 @uminv_v8i64(<8 x i64>* %a) #0 { ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret %op = load <8 x i64>, <8 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> %op) + %res = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %op) ret i64 %res } @@ -1552,7 +1552,7 @@ define i64 @uminv_v16i64(<16 x i64>* %a) #0 { ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_1024-NEXT: ret %op = load <16 x i64>, <16 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> %op) + %res = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> %op) ret i64 %res } @@ -1564,148 +1564,148 @@ define i64 @uminv_v32i64(<32 x i64>* %a) #0 { ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]] ; VBITS_GE_2048-NEXT: ret %op = load <32 x i64>, <32 x i64>* %a - %res = call i64 @llvm.experimental.vector.reduce.umin.v32i64(<32 x i64> %op) + %res = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> %op) ret i64 %res } attributes #0 = { "target-features"="+sve" } -declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v256i8(<256 x i8>) - -declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v128i16(<128 x i16>) - -declare i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v64i32(<64 x i32>) - -declare i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v32i64(<32 x i64>) - -declare i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v256i8(<256 x i8>) - -declare i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v128i16(<128 x i16>) - -declare i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v64i32(<64 x i32>) - -declare i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.v32i64(<32 x i64>) - -declare i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v256i8(<256 x i8>) - -declare i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v128i16(<128 x i16>) - -declare i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v64i32(<64 x i32>) - -declare i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v32i64(<32 x i64>) - -declare i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v256i8(<256 x i8>) - -declare i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v128i16(<128 x i16>) - -declare i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v64i32(<64 x i32>) - -declare i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v32i64(<32 x i64>) - -declare i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v256i8(<256 x i8>) - -declare i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v128i16(<128 x i16>) - -declare i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v64i32(<64 x i32>) - -declare i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v32i64(<32 x i64>) +declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.add.v256i8(<256 x i8>) + +declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.add.v128i16(<128 x i16>) + +declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.add.v64i32(<64 x i32>) + +declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.add.v32i64(<32 x i64>) + +declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.smax.v256i8(<256 x i8>) + +declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.smax.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.smax.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.smax.v128i16(<128 x i16>) + +declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.smax.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.smax.v64i32(<64 x i32>) + +declare i64 @llvm.vector.reduce.smax.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.smax.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.smax.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.smax.v32i64(<32 x i64>) + +declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.smin.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.smin.v256i8(<256 x i8>) + +declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.smin.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.smin.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.smin.v128i16(<128 x i16>) + +declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.smin.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.smin.v64i32(<64 x i32>) + +declare i64 @llvm.vector.reduce.smin.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.smin.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.smin.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.smin.v32i64(<32 x i64>) + +declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.umax.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.umax.v256i8(<256 x i8>) + +declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.umax.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.umax.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.umax.v128i16(<128 x i16>) + +declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.umax.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.umax.v64i32(<64 x i32>) + +declare i64 @llvm.vector.reduce.umax.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.umax.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.umax.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.umax.v32i64(<32 x i64>) + +declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.umin.v256i8(<256 x i8>) + +declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.umin.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.umin.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.umin.v128i16(<128 x i16>) + +declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.umin.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.umin.v64i32(<64 x i32>) + +declare i64 @llvm.vector.reduce.umin.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.umin.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.umin.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.umin.v32i64(<32 x i64>) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll index 2f899878363a5..36ec218e4e20c 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll @@ -1,28 +1,28 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK -declare i1 @llvm.experimental.vector.reduce.add.v1i1(<1 x i1> %a) -declare i8 @llvm.experimental.vector.reduce.add.v1i8(<1 x i8> %a) -declare i16 @llvm.experimental.vector.reduce.add.v1i16(<1 x i16> %a) -declare i24 @llvm.experimental.vector.reduce.add.v1i24(<1 x i24> %a) -declare i32 @llvm.experimental.vector.reduce.add.v1i32(<1 x i32> %a) -declare i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> %a) -declare i128 @llvm.experimental.vector.reduce.add.v1i128(<1 x i128> %a) - -declare i8 @llvm.experimental.vector.reduce.add.v3i8(<3 x i8> %a) -declare i8 @llvm.experimental.vector.reduce.add.v9i8(<9 x i8> %a) -declare i32 @llvm.experimental.vector.reduce.add.v3i32(<3 x i32> %a) -declare i1 @llvm.experimental.vector.reduce.add.v4i1(<4 x i1> %a) -declare i24 @llvm.experimental.vector.reduce.add.v4i24(<4 x i24> %a) -declare i128 @llvm.experimental.vector.reduce.add.v2i128(<2 x i128> %a) -declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %a) +declare i1 @llvm.vector.reduce.add.v1i1(<1 x i1> %a) +declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a) +declare i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a) +declare i24 @llvm.vector.reduce.add.v1i24(<1 x i24> %a) +declare i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a) +declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a) +declare i128 @llvm.vector.reduce.add.v1i128(<1 x i128> %a) + +declare i8 @llvm.vector.reduce.add.v3i8(<3 x i8> %a) +declare i8 @llvm.vector.reduce.add.v9i8(<9 x i8> %a) +declare i32 @llvm.vector.reduce.add.v3i32(<3 x i32> %a) +declare i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %a) +declare i24 @llvm.vector.reduce.add.v4i24(<4 x i24> %a) +declare i128 @llvm.vector.reduce.add.v2i128(<2 x i128> %a) +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a) define i1 @test_v1i1(<1 x i1> %a) nounwind { ; CHECK-LABEL: test_v1i1: ; CHECK: // %bb.0: ; CHECK-NEXT: and w0, w0, #0x1 ; CHECK-NEXT: ret - %b = call i1 @llvm.experimental.vector.reduce.add.v1i1(<1 x i1> %a) + %b = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> %a) ret i1 %b } @@ -32,7 +32,7 @@ define i8 @test_v1i8(<1 x i8> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w0, v0.b[0] ; CHECK-NEXT: ret - %b = call i8 @llvm.experimental.vector.reduce.add.v1i8(<1 x i8> %a) + %b = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a) ret i8 %b } @@ -42,7 +42,7 @@ define i16 @test_v1i16(<1 x i16> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w0, v0.h[0] ; CHECK-NEXT: ret - %b = call i16 @llvm.experimental.vector.reduce.add.v1i16(<1 x i16> %a) + %b = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a) ret i16 %b } @@ -50,7 +50,7 @@ define i24 @test_v1i24(<1 x i24> %a) nounwind { ; CHECK-LABEL: test_v1i24: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call i24 @llvm.experimental.vector.reduce.add.v1i24(<1 x i24> %a) + %b = call i24 @llvm.vector.reduce.add.v1i24(<1 x i24> %a) ret i24 %b } @@ -60,7 +60,7 @@ define i32 @test_v1i32(<1 x i32> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret - %b = call i32 @llvm.experimental.vector.reduce.add.v1i32(<1 x i32> %a) + %b = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a) ret i32 %b } @@ -70,7 +70,7 @@ define i64 @test_v1i64(<1 x i64> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret - %b = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> %a) + %b = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a) ret i64 %b } @@ -78,7 +78,7 @@ define i128 @test_v1i128(<1 x i128> %a) nounwind { ; CHECK-LABEL: test_v1i128: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call i128 @llvm.experimental.vector.reduce.add.v1i128(<1 x i128> %a) + %b = call i128 @llvm.vector.reduce.add.v1i128(<1 x i128> %a) ret i128 %b } @@ -92,7 +92,7 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind { ; CHECK-NEXT: addv h0, v0.4h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret - %b = call i8 @llvm.experimental.vector.reduce.add.v3i8(<3 x i8> %a) + %b = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> %a) ret i8 %b } @@ -109,7 +109,7 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind { ; CHECK-NEXT: addv b0, v0.16b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret - %b = call i8 @llvm.experimental.vector.reduce.add.v9i8(<9 x i8> %a) + %b = call i8 @llvm.vector.reduce.add.v9i8(<9 x i8> %a) ret i8 %b } @@ -120,7 +120,7 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind { ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret - %b = call i32 @llvm.experimental.vector.reduce.add.v3i32(<3 x i32> %a) + %b = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> %a) ret i32 %b } @@ -131,7 +131,7 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind { ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret - %b = call i1 @llvm.experimental.vector.reduce.add.v4i1(<4 x i1> %a) + %b = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %a) ret i1 %b } @@ -141,7 +141,7 @@ define i24 @test_v4i24(<4 x i24> %a) nounwind { ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret - %b = call i24 @llvm.experimental.vector.reduce.add.v4i24(<4 x i24> %a) + %b = call i24 @llvm.vector.reduce.add.v4i24(<4 x i24> %a) ret i24 %b } @@ -151,7 +151,7 @@ define i128 @test_v2i128(<2 x i128> %a) nounwind { ; CHECK-NEXT: adds x0, x0, x2 ; CHECK-NEXT: adcs x1, x1, x3 ; CHECK-NEXT: ret - %b = call i128 @llvm.experimental.vector.reduce.add.v2i128(<2 x i128> %a) + %b = call i128 @llvm.vector.reduce.add.v2i128(<2 x i128> %a) ret i128 %b } @@ -164,6 +164,6 @@ define i32 @test_v16i32(<16 x i32> %a) nounwind { ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret - %b = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %a) + %b = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a) ret i32 %b } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll index 43b6f5290d1a2..99d55b33da352 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll @@ -1,28 +1,28 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK -declare i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %a) -declare i8 @llvm.experimental.vector.reduce.and.v1i8(<1 x i8> %a) -declare i16 @llvm.experimental.vector.reduce.and.v1i16(<1 x i16> %a) -declare i24 @llvm.experimental.vector.reduce.and.v1i24(<1 x i24> %a) -declare i32 @llvm.experimental.vector.reduce.and.v1i32(<1 x i32> %a) -declare i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> %a) -declare i128 @llvm.experimental.vector.reduce.and.v1i128(<1 x i128> %a) - -declare i8 @llvm.experimental.vector.reduce.and.v3i8(<3 x i8> %a) -declare i8 @llvm.experimental.vector.reduce.and.v9i8(<9 x i8> %a) -declare i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> %a) -declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a) -declare i24 @llvm.experimental.vector.reduce.and.v4i24(<4 x i24> %a) -declare i128 @llvm.experimental.vector.reduce.and.v2i128(<2 x i128> %a) -declare i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> %a) +declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %a) +declare i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %a) +declare i16 @llvm.vector.reduce.and.v1i16(<1 x i16> %a) +declare i24 @llvm.vector.reduce.and.v1i24(<1 x i24> %a) +declare i32 @llvm.vector.reduce.and.v1i32(<1 x i32> %a) +declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %a) +declare i128 @llvm.vector.reduce.and.v1i128(<1 x i128> %a) + +declare i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a) +declare i8 @llvm.vector.reduce.and.v9i8(<9 x i8> %a) +declare i32 @llvm.vector.reduce.and.v3i32(<3 x i32> %a) +declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) +declare i24 @llvm.vector.reduce.and.v4i24(<4 x i24> %a) +declare i128 @llvm.vector.reduce.and.v2i128(<2 x i128> %a) +declare i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a) define i1 @test_v1i1(<1 x i1> %a) nounwind { ; CHECK-LABEL: test_v1i1: ; CHECK: // %bb.0: ; CHECK-NEXT: and w0, w0, #0x1 ; CHECK-NEXT: ret - %b = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %a) ret i1 %b } @@ -32,7 +32,7 @@ define i8 @test_v1i8(<1 x i8> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w0, v0.b[0] ; CHECK-NEXT: ret - %b = call i8 @llvm.experimental.vector.reduce.and.v1i8(<1 x i8> %a) + %b = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %a) ret i8 %b } @@ -42,7 +42,7 @@ define i16 @test_v1i16(<1 x i16> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w0, v0.h[0] ; CHECK-NEXT: ret - %b = call i16 @llvm.experimental.vector.reduce.and.v1i16(<1 x i16> %a) + %b = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> %a) ret i16 %b } @@ -50,7 +50,7 @@ define i24 @test_v1i24(<1 x i24> %a) nounwind { ; CHECK-LABEL: test_v1i24: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call i24 @llvm.experimental.vector.reduce.and.v1i24(<1 x i24> %a) + %b = call i24 @llvm.vector.reduce.and.v1i24(<1 x i24> %a) ret i24 %b } @@ -60,7 +60,7 @@ define i32 @test_v1i32(<1 x i32> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret - %b = call i32 @llvm.experimental.vector.reduce.and.v1i32(<1 x i32> %a) + %b = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> %a) ret i32 %b } @@ -70,7 +70,7 @@ define i64 @test_v1i64(<1 x i64> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret - %b = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> %a) + %b = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %a) ret i64 %b } @@ -78,7 +78,7 @@ define i128 @test_v1i128(<1 x i128> %a) nounwind { ; CHECK-LABEL: test_v1i128: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call i128 @llvm.experimental.vector.reduce.and.v1i128(<1 x i128> %a) + %b = call i128 @llvm.vector.reduce.and.v1i128(<1 x i128> %a) ret i128 %b } @@ -89,7 +89,7 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind { ; CHECK-NEXT: and w8, w8, w2 ; CHECK-NEXT: and w0, w8, #0xff ; CHECK-NEXT: ret - %b = call i8 @llvm.experimental.vector.reduce.and.v3i8(<3 x i8> %a) + %b = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a) ret i8 %b } @@ -120,7 +120,7 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind { ; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret - %b = call i8 @llvm.experimental.vector.reduce.and.v9i8(<9 x i8> %a) + %b = call i8 @llvm.vector.reduce.and.v9i8(<9 x i8> %a) ret i8 %b } @@ -133,7 +133,7 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind { ; CHECK-NEXT: fmov w9, s1 ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret - %b = call i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> %a) + %b = call i32 @llvm.vector.reduce.and.v3i32(<3 x i32> %a) ret i32 %b } @@ -150,7 +150,7 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind { ; CHECK-NEXT: and w8, w9, w8 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret - %b = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) ret i1 %b } @@ -163,7 +163,7 @@ define i24 @test_v4i24(<4 x i24> %a) nounwind { ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret - %b = call i24 @llvm.experimental.vector.reduce.and.v4i24(<4 x i24> %a) + %b = call i24 @llvm.vector.reduce.and.v4i24(<4 x i24> %a) ret i24 %b } @@ -173,7 +173,7 @@ define i128 @test_v2i128(<2 x i128> %a) nounwind { ; CHECK-NEXT: and x0, x0, x2 ; CHECK-NEXT: and x1, x1, x3 ; CHECK-NEXT: ret - %b = call i128 @llvm.experimental.vector.reduce.and.v2i128(<2 x i128> %a) + %b = call i128 @llvm.vector.reduce.and.v2i128(<2 x i128> %a) ret i128 %b } @@ -189,6 +189,6 @@ define i32 @test_v16i32(<16 x i32> %a) nounwind { ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret - %b = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> %a) + %b = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a) ret i32 %b } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll index e404159c7630c..1b5692c01332c 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll @@ -1,19 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK -declare i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %a) -declare i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> %a) -declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a) -declare i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a) -declare i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a) -declare i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a) +declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %a) +declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a) +declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) +declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) +declare i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) +declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a) -declare i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> %a) -declare i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %a) -declare i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a) -declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a) -declare i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a) -declare i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a) +declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %a) +declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %a) +declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a) +declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a) +declare i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) +declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a) define i32 @reduce_and_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-LABEL: reduce_and_v1: @@ -24,7 +24,7 @@ define i32 @reduce_and_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-NEXT: csel w0, w0, w1, lt ; CHECK-NEXT: ret %x = icmp slt <1 x i8> %a0, zeroinitializer - %y = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %x) + %y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x) %z = select i1 %y, i32 %a1, i32 %a2 ret i32 %z } @@ -41,7 +41,7 @@ define i32 @reduce_and_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret %x = icmp slt <2 x i8> %a0, zeroinitializer - %y = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> %x) + %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x) %z = select i1 %y, i32 %a1, i32 %a2 ret i32 %z } @@ -58,7 +58,7 @@ define i32 @reduce_and_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret %x = icmp slt <4 x i8> %a0, zeroinitializer - %y = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %x) + %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x) %z = select i1 %y, i32 %a1, i32 %a2 ret i32 %z } @@ -73,7 +73,7 @@ define i32 @reduce_and_v8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret %x = icmp slt <8 x i8> %a0, zeroinitializer - %y = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %x) + %y = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %x) %z = select i1 %y, i32 %a1, i32 %a2 ret i32 %z } @@ -88,7 +88,7 @@ define i32 @reduce_and_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret %x = icmp slt <16 x i8> %a0, zeroinitializer - %y = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %x) + %y = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %x) %z = select i1 %y, i32 %a1, i32 %a2 ret i32 %z } @@ -105,7 +105,7 @@ define i32 @reduce_and_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret %x = icmp slt <32 x i8> %a0, zeroinitializer - %y = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %x) + %y = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %x) %z = select i1 %y, i32 %a1, i32 %a2 ret i32 %z } @@ -119,7 +119,7 @@ define i32 @reduce_or_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-NEXT: csel w0, w0, w1, lt ; CHECK-NEXT: ret %x = icmp slt <1 x i8> %a0, zeroinitializer - %y = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> %x) + %y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x) %z = select i1 %y, i32 %a1, i32 %a2 ret i32 %z } @@ -136,7 +136,7 @@ define i32 @reduce_or_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret %x = icmp slt <2 x i8> %a0, zeroinitializer - %y = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %x) + %y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x) %z = select i1 %y, i32 %a1, i32 %a2 ret i32 %z } @@ -153,7 +153,7 @@ define i32 @reduce_or_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret %x = icmp slt <4 x i8> %a0, zeroinitializer - %y = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %x) + %y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x) %z = select i1 %y, i32 %a1, i32 %a2 ret i32 %z } @@ -168,7 +168,7 @@ define i32 @reduce_or_v8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret %x = icmp slt <8 x i8> %a0, zeroinitializer - %y = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %x) + %y = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %x) %z = select i1 %y, i32 %a1, i32 %a2 ret i32 %z } @@ -183,7 +183,7 @@ define i32 @reduce_or_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret %x = icmp slt <16 x i8> %a0, zeroinitializer - %y = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %x) + %y = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %x) %z = select i1 %y, i32 %a1, i32 %a2 ret i32 %z } @@ -200,7 +200,7 @@ define i32 @reduce_or_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret %x = icmp slt <32 x i8> %a0, zeroinitializer - %y = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %x) + %y = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %x) %z = select i1 %y, i32 %a1, i32 %a2 ret i32 %z } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll index 5d6f2e40d4d5b..12c166490e87b 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll @@ -3,14 +3,14 @@ ; Same as vecreduce-fadd-legalization.ll, but without fmf. -declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half, <1 x half>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float, <1 x float>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double, <1 x double>) -declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128, <1 x fp128>) +declare half @llvm.vector.reduce.fadd.f16.v1f16(half, <1 x half>) +declare float @llvm.vector.reduce.fadd.f32.v1f32(float, <1 x float>) +declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>) +declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float, <3 x float>) -declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128, <2 x fp128>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float, <16 x float>) +declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>) +declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>) +declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>) define half @test_v1f16(<1 x half> %a) nounwind { ; CHECK-LABEL: test_v1f16: @@ -20,7 +20,7 @@ define half @test_v1f16(<1 x half> %a) nounwind { ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: ret - %b = call half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half 0.0, <1 x half> %a) + %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half 0.0, <1 x half> %a) ret half %b } @@ -31,7 +31,7 @@ define float @test_v1f32(<1 x float> %a) nounwind { ; CHECK-NEXT: fmov s1, wzr ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret - %b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float 0.0, <1 x float> %a) + %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float 0.0, <1 x float> %a) ret float %b } @@ -41,7 +41,7 @@ define double @test_v1f64(<1 x double> %a) nounwind { ; CHECK-NEXT: fmov d1, xzr ; CHECK-NEXT: fadd d0, d0, d1 ; CHECK-NEXT: ret - %b = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double 0.0, <1 x double> %a) + %b = call double @llvm.vector.reduce.fadd.f64.v1f64(double 0.0, <1 x double> %a) ret double %b } @@ -54,7 +54,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind { ; CHECK-NEXT: bl __addtf3 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %b = call fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a) + %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a) ret fp128 %b } @@ -68,7 +68,7 @@ define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: fadd s0, s1, s0 ; CHECK-NEXT: ret - %b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float 0.0, <3 x float> %a) + %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float 0.0, <3 x float> %a) ret float %b } @@ -86,7 +86,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 // =32 ; CHECK-NEXT: ret - %b = call fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a) + %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a) ret fp128 %b } @@ -123,6 +123,6 @@ define float @test_v16f32(<16 x float> %a) nounwind { ; CHECK-NEXT: mov s1, v3.s[3] ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret - %b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.0, <16 x float> %a) + %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a) ret float %b } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll index 11686148421da..e098c08ce46c6 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll @@ -1,20 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK -declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half, <1 x half>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float, <1 x float>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double, <1 x double>) -declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128, <1 x fp128>) +declare half @llvm.vector.reduce.fadd.f16.v1f16(half, <1 x half>) +declare float @llvm.vector.reduce.fadd.f32.v1f32(float, <1 x float>) +declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>) +declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float, <3 x float>) -declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128, <2 x fp128>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float, <16 x float>) +declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>) +declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>) +declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>) define half @test_v1f16(<1 x half> %a) nounwind { ; CHECK-LABEL: test_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call fast nnan half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half 0.0, <1 x half> %a) + %b = call fast nnan half @llvm.vector.reduce.fadd.f16.v1f16(half 0.0, <1 x half> %a) ret half %b } @@ -24,7 +24,7 @@ define float @test_v1f32(<1 x float> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: ret - %b = call fast nnan float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float 0.0, <1 x float> %a) + %b = call fast nnan float @llvm.vector.reduce.fadd.f32.v1f32(float 0.0, <1 x float> %a) ret float %b } @@ -32,7 +32,7 @@ define double @test_v1f64(<1 x double> %a) nounwind { ; CHECK-LABEL: test_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call fast nnan double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double 0.0, <1 x double> %a) + %b = call fast nnan double @llvm.vector.reduce.fadd.f64.v1f64(double 0.0, <1 x double> %a) ret double %b } @@ -40,7 +40,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind { ; CHECK-LABEL: test_v1f128: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call fast nnan fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a) + %b = call fast nnan fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a) ret fp128 %b } @@ -53,7 +53,7 @@ define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: faddp s0, v0.2s ; CHECK-NEXT: ret - %b = call fast nnan float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float 0.0, <3 x float> %a) + %b = call fast nnan float @llvm.vector.reduce.fadd.f32.v3f32(float 0.0, <3 x float> %a) ret float %b } @@ -64,7 +64,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-NEXT: bl __addtf3 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %b = call fast nnan fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a) + %b = call fast nnan fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a) ret fp128 %b } @@ -78,6 +78,6 @@ define float @test_v16f32(<16 x float> %a) nounwind { ; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: faddp s0, v0.2s ; CHECK-NEXT: ret - %b = call fast nnan float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.0, <16 x float> %a) + %b = call fast nnan float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a) ret float %b } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll index 90367377fb4a0..7c5e50a6ef116 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll @@ -14,7 +14,7 @@ define float @add_HalfS(<2 x float> %bin.rdx) { ; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECKNOFP16-NEXT: faddp s0, v0.2s ; CHECKNOFP16-NEXT: ret - %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float 0.0, <2 x float> %bin.rdx) + %r = call fast float @llvm.vector.reduce.fadd.f32.v2f32(float 0.0, <2 x float> %bin.rdx) ret float %r } @@ -48,7 +48,7 @@ define half @add_HalfH(<4 x half> %bin.rdx) { ; CHECKNOFP16-NEXT: fadd s0, s0, s1 ; CHECKNOFP16-NEXT: fcvt h0, s0 ; CHECKNOFP16-NEXT: ret - %r = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half 0.0, <4 x half> %bin.rdx) + %r = call fast half @llvm.vector.reduce.fadd.f16.v4f16(half 0.0, <4 x half> %bin.rdx) ret half %r } @@ -103,7 +103,7 @@ define half @add_H(<8 x half> %bin.rdx) { ; CHECKNOFP16-NEXT: fadd s0, s0, s1 ; CHECKNOFP16-NEXT: fcvt h0, s0 ; CHECKNOFP16-NEXT: ret - %r = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v8f16(half 0.0, <8 x half> %bin.rdx) + %r = call fast half @llvm.vector.reduce.fadd.f16.v8f16(half 0.0, <8 x half> %bin.rdx) ret half %r } @@ -121,7 +121,7 @@ define float @add_S(<4 x float> %bin.rdx) { ; CHECKNOFP16-NEXT: fadd v0.2s, v0.2s, v1.2s ; CHECKNOFP16-NEXT: faddp s0, v0.2s ; CHECKNOFP16-NEXT: ret - %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %bin.rdx) + %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %bin.rdx) ret float %r } @@ -135,7 +135,7 @@ define double @add_D(<2 x double> %bin.rdx) { ; CHECKNOFP16: // %bb.0: ; CHECKNOFP16-NEXT: faddp d0, v0.2d ; CHECKNOFP16-NEXT: ret - %r = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double 0.0, <2 x double> %bin.rdx) + %r = call fast double @llvm.vector.reduce.fadd.f64.v2f64(double 0.0, <2 x double> %bin.rdx) ret double %r } @@ -229,7 +229,7 @@ define half @add_2H(<16 x half> %bin.rdx) { ; CHECKNOFP16-NEXT: fadd s0, s1, s0 ; CHECKNOFP16-NEXT: fcvt h0, s0 ; CHECKNOFP16-NEXT: ret - %r = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v16f16(half 0.0, <16 x half> %bin.rdx) + %r = call fast half @llvm.vector.reduce.fadd.f16.v16f16(half 0.0, <16 x half> %bin.rdx) ret half %r } @@ -249,7 +249,7 @@ define float @add_2S(<8 x float> %bin.rdx) { ; CHECKNOFP16-NEXT: fadd v0.2s, v0.2s, v1.2s ; CHECKNOFP16-NEXT: faddp s0, v0.2s ; CHECKNOFP16-NEXT: ret - %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.0, <8 x float> %bin.rdx) + %r = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float 0.0, <8 x float> %bin.rdx) ret float %r } @@ -265,16 +265,16 @@ define double @add_2D(<4 x double> %bin.rdx) { ; CHECKNOFP16-NEXT: fadd v0.2d, v0.2d, v1.2d ; CHECKNOFP16-NEXT: faddp d0, v0.2d ; CHECKNOFP16-NEXT: ret - %r = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double 0.0, <4 x double> %bin.rdx) + %r = call fast double @llvm.vector.reduce.fadd.f64.v4f64(double 0.0, <4 x double> %bin.rdx) ret double %r } ; Function Attrs: nounwind readnone -declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half, <4 x half>) -declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v8f16(half, <8 x half>) -declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v16f16(half, <16 x half>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float, <2 x float>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double, <2 x double>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double, <4 x double>) +declare half @llvm.vector.reduce.fadd.f16.v4f16(half, <4 x half>) +declare half @llvm.vector.reduce.fadd.f16.v8f16(half, <8 x half>) +declare half @llvm.vector.reduce.fadd.f16.v16f16(half, <16 x half>) +declare float @llvm.vector.reduce.fadd.f32.v2f32(float, <2 x float>) +declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>) +declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>) +declare double @llvm.vector.reduce.fadd.f64.v4f64(double, <4 x double>) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll index 514a43a5e171f..8d952a443c716 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll @@ -1,20 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK -declare half @llvm.experimental.vector.reduce.fmax.v1f16(<1 x half> %a) -declare float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a) -declare double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a) -declare fp128 @llvm.experimental.vector.reduce.fmax.v1f128(<1 x fp128> %a) +declare half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a) +declare float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a) +declare double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a) +declare fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a) -declare float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a) -declare fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a) -declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a) +declare float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a) +declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a) +declare float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a) define half @test_v1f16(<1 x half> %a) nounwind { ; CHECK-LABEL: test_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call half @llvm.experimental.vector.reduce.fmax.v1f16(<1 x half> %a) + %b = call half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a) ret half %b } @@ -24,7 +24,7 @@ define float @test_v1f32(<1 x float> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: ret - %b = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a) + %b = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a) ret float %b } @@ -32,7 +32,7 @@ define double @test_v1f64(<1 x double> %a) nounwind { ; CHECK-LABEL: test_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a) + %b = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a) ret double %b } @@ -40,14 +40,14 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind { ; CHECK-LABEL: test_v1f128: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call fp128 @llvm.experimental.vector.reduce.fmax.v1f128(<1 x fp128> %a) + %b = call fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a) ret fp128 %b } ; TODO: This doesn't work, because ExpandReductions only supports power of two ; unordered reductions. ;define float @test_v3f32(<3 x float> %a) nounwind { -; %b = call float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a) +; %b = call float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a) ; ret float %b ;} @@ -55,7 +55,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-LABEL: test_v2f128: ; CHECK: // %bb.0: ; CHECK-NEXT: b fmaxl - %b = call fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a) + %b = call fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a) ret fp128 %b } @@ -67,6 +67,6 @@ define float @test_v16f32(<16 x float> %a) nounwind { ; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s ; CHECK-NEXT: fmaxnmv s0, v0.4s ; CHECK-NEXT: ret - %b = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a) + %b = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a) ret float %b } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll index 89cee4f0a0600..f1ebd8fa85ead 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -1,20 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK -declare half @llvm.experimental.vector.reduce.fmax.v1f16(<1 x half> %a) -declare float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a) -declare double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a) -declare fp128 @llvm.experimental.vector.reduce.fmax.v1f128(<1 x fp128> %a) +declare half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a) +declare float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a) +declare double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a) +declare fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a) -declare float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a) -declare fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a) -declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a) +declare float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a) +declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a) +declare float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a) define half @test_v1f16(<1 x half> %a) nounwind { ; CHECK-LABEL: test_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call nnan half @llvm.experimental.vector.reduce.fmax.v1f16(<1 x half> %a) + %b = call nnan half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a) ret half %b } @@ -24,7 +24,7 @@ define float @test_v1f32(<1 x float> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: ret - %b = call nnan float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a) + %b = call nnan float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a) ret float %b } @@ -32,7 +32,7 @@ define double @test_v1f64(<1 x double> %a) nounwind { ; CHECK-LABEL: test_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call nnan double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a) + %b = call nnan double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a) ret double %b } @@ -40,7 +40,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind { ; CHECK-LABEL: test_v1f128: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call nnan fp128 @llvm.experimental.vector.reduce.fmax.v1f128(<1 x fp128> %a) + %b = call nnan fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a) ret fp128 %b } @@ -52,7 +52,7 @@ define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: fmaxnmv s0, v0.4s ; CHECK-NEXT: ret - %b = call nnan float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a) + %b = call nnan float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a) ret float %b } @@ -64,7 +64,7 @@ define float @test_v3f32_ninf(<3 x float> %a) nounwind { ; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: fmaxnmv s0, v0.4s ; CHECK-NEXT: ret - %b = call nnan ninf float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a) + %b = call nnan ninf float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a) ret float %b } @@ -72,7 +72,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-LABEL: test_v2f128: ; CHECK: // %bb.0: ; CHECK-NEXT: b fmaxl - %b = call nnan fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a) + %b = call nnan fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a) ret fp128 %b } @@ -84,6 +84,6 @@ define float @test_v16f32(<16 x float> %a) nounwind { ; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s ; CHECK-NEXT: fmaxnmv s0, v0.4s ; CHECK-NEXT: ret - %b = call nnan float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a) + %b = call nnan float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a) ret float %b } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll index bb2d6b75bcd2a..4129fa80b13e6 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll @@ -1,20 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK -declare half @llvm.experimental.vector.reduce.fmin.v1f16(<1 x half> %a) -declare float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> %a) -declare double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %a) -declare fp128 @llvm.experimental.vector.reduce.fmin.v1f128(<1 x fp128> %a) +declare half @llvm.vector.reduce.fmin.v1f16(<1 x half> %a) +declare float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a) +declare double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a) +declare fp128 @llvm.vector.reduce.fmin.v1f128(<1 x fp128> %a) -declare float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a) -declare fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128> %a) -declare float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %a) +declare float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a) +declare fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a) +declare float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a) define half @test_v1f16(<1 x half> %a) nounwind { ; CHECK-LABEL: test_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call nnan half @llvm.experimental.vector.reduce.fmin.v1f16(<1 x half> %a) + %b = call nnan half @llvm.vector.reduce.fmin.v1f16(<1 x half> %a) ret half %b } @@ -24,7 +24,7 @@ define float @test_v1f32(<1 x float> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: ret - %b = call nnan float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> %a) + %b = call nnan float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a) ret float %b } @@ -32,7 +32,7 @@ define double @test_v1f64(<1 x double> %a) nounwind { ; CHECK-LABEL: test_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call nnan double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %a) + %b = call nnan double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a) ret double %b } @@ -40,7 +40,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind { ; CHECK-LABEL: test_v1f128: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call nnan fp128 @llvm.experimental.vector.reduce.fmin.v1f128(<1 x fp128> %a) + %b = call nnan fp128 @llvm.vector.reduce.fmin.v1f128(<1 x fp128> %a) ret fp128 %b } @@ -52,7 +52,7 @@ define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: fminnmv s0, v0.4s ; CHECK-NEXT: ret - %b = call nnan float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a) + %b = call nnan float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a) ret float %b } @@ -64,7 +64,7 @@ define float @test_v3f32_ninf(<3 x float> %a) nounwind { ; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: fminnmv s0, v0.4s ; CHECK-NEXT: ret - %b = call nnan ninf float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a) + %b = call nnan ninf float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a) ret float %b } @@ -72,7 +72,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-LABEL: test_v2f128: ; CHECK: // %bb.0: ; CHECK-NEXT: b fminl - %b = call nnan fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128> %a) + %b = call nnan fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a) ret fp128 %b } @@ -84,6 +84,6 @@ define float @test_v16f32(<16 x float> %a) nounwind { ; CHECK-NEXT: fminnm v0.4s, v0.4s, v1.4s ; CHECK-NEXT: fminnmv s0, v0.4s ; CHECK-NEXT: ret - %b = call nnan float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %a) + %b = call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a) ret float %b } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll index 41914ca73afad..be661127f066c 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll @@ -3,14 +3,14 @@ ; Same as vecreduce-fmul-legalization.ll, but without fmf. -declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v1f16(half, <1 x half>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v1f32(float, <1 x float>) -declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double, <1 x double>) -declare fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v1f128(fp128, <1 x fp128>) +declare half @llvm.vector.reduce.fmul.f16.v1f16(half, <1 x half>) +declare float @llvm.vector.reduce.fmul.f32.v1f32(float, <1 x float>) +declare double @llvm.vector.reduce.fmul.f64.v1f64(double, <1 x double>) +declare fp128 @llvm.vector.reduce.fmul.f128.v1f128(fp128, <1 x fp128>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v3f32(float, <3 x float>) -declare fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128, <2 x fp128>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float, <16 x float>) +declare float @llvm.vector.reduce.fmul.f32.v3f32(float, <3 x float>) +declare fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128, <2 x fp128>) +declare float @llvm.vector.reduce.fmul.f32.v16f32(float, <16 x float>) define half @test_v1f16(<1 x half> %a) nounwind { ; CHECK-LABEL: test_v1f16: @@ -20,7 +20,7 @@ define half @test_v1f16(<1 x half> %a) nounwind { ; CHECK-NEXT: fmul s0, s0, s1 ; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: ret - %b = call half @llvm.experimental.vector.reduce.v2.fmul.f16.v1f16(half 0.0, <1 x half> %a) + %b = call half @llvm.vector.reduce.fmul.f16.v1f16(half 0.0, <1 x half> %a) ret half %b } @@ -31,7 +31,7 @@ define float @test_v1f32(<1 x float> %a) nounwind { ; CHECK-NEXT: fmov s1, wzr ; CHECK-NEXT: fmul s0, s1, v0.s[0] ; CHECK-NEXT: ret - %b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v1f32(float 0.0, <1 x float> %a) + %b = call float @llvm.vector.reduce.fmul.f32.v1f32(float 0.0, <1 x float> %a) ret float %b } @@ -41,7 +41,7 @@ define double @test_v1f64(<1 x double> %a) nounwind { ; CHECK-NEXT: fmov d1, xzr ; CHECK-NEXT: fmul d0, d0, d1 ; CHECK-NEXT: ret - %b = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double 0.0, <1 x double> %a) + %b = call double @llvm.vector.reduce.fmul.f64.v1f64(double 0.0, <1 x double> %a) ret double %b } @@ -54,7 +54,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind { ; CHECK-NEXT: bl __multf3 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %b = call fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a) + %b = call fp128 @llvm.vector.reduce.fmul.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a) ret fp128 %b } @@ -66,7 +66,7 @@ define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-NEXT: fmul s1, s1, v0.s[1] ; CHECK-NEXT: fmul s0, s1, v0.s[2] ; CHECK-NEXT: ret - %b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v3f32(float 0.0, <3 x float> %a) + %b = call float @llvm.vector.reduce.fmul.f32.v3f32(float 0.0, <3 x float> %a) ret float %b } @@ -84,7 +84,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 // =32 ; CHECK-NEXT: ret - %b = call fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a) + %b = call fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a) ret fp128 %b } @@ -109,6 +109,6 @@ define float @test_v16f32(<16 x float> %a) nounwind { ; CHECK-NEXT: fmul s0, s0, v3.s[2] ; CHECK-NEXT: fmul s0, s0, v3.s[3] ; CHECK-NEXT: ret - %b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float 0.0, <16 x float> %a) + %b = call float @llvm.vector.reduce.fmul.f32.v16f32(float 0.0, <16 x float> %a) ret float %b } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll b/llvm/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll index cdb557d1a3948..5675fd7e59c9f 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll @@ -24,8 +24,8 @@ entry: %1 = insertelement <4 x double> %0, double 1.0, i32 1 %2 = insertelement <4 x double> %1, double 1.0, i32 2 %3 = insertelement <4 x double> %2, double 1.0, i32 3 - %4 = call nnan reassoc double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %3) + %4 = call nnan reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> %3) ret double %4 } -declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>) +declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll index 7c9415211681c..811745a1ee411 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll @@ -1,29 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK -declare i1 @llvm.experimental.vector.reduce.umax.v1i1(<1 x i1> %a) -declare i8 @llvm.experimental.vector.reduce.umax.v1i8(<1 x i8> %a) -declare i16 @llvm.experimental.vector.reduce.umax.v1i16(<1 x i16> %a) -declare i24 @llvm.experimental.vector.reduce.umax.v1i24(<1 x i24> %a) -declare i32 @llvm.experimental.vector.reduce.umax.v1i32(<1 x i32> %a) -declare i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> %a) -declare i128 @llvm.experimental.vector.reduce.umax.v1i128(<1 x i128> %a) - -declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %a) -declare i8 @llvm.experimental.vector.reduce.umax.v3i8(<3 x i8> %a) -declare i8 @llvm.experimental.vector.reduce.umax.v9i8(<9 x i8> %a) -declare i32 @llvm.experimental.vector.reduce.umax.v3i32(<3 x i32> %a) -declare i1 @llvm.experimental.vector.reduce.umax.v4i1(<4 x i1> %a) -declare i24 @llvm.experimental.vector.reduce.umax.v4i24(<4 x i24> %a) -declare i128 @llvm.experimental.vector.reduce.umax.v2i128(<2 x i128> %a) -declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %a) +declare i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> %a) +declare i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> %a) +declare i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> %a) +declare i24 @llvm.vector.reduce.umax.v1i24(<1 x i24> %a) +declare i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> %a) +declare i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> %a) +declare i128 @llvm.vector.reduce.umax.v1i128(<1 x i128> %a) + +declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a) +declare i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> %a) +declare i8 @llvm.vector.reduce.umax.v9i8(<9 x i8> %a) +declare i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a) +declare i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> %a) +declare i24 @llvm.vector.reduce.umax.v4i24(<4 x i24> %a) +declare i128 @llvm.vector.reduce.umax.v2i128(<2 x i128> %a) +declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %a) define i1 @test_v1i1(<1 x i1> %a) nounwind { ; CHECK-LABEL: test_v1i1: ; CHECK: // %bb.0: ; CHECK-NEXT: and w0, w0, #0x1 ; CHECK-NEXT: ret - %b = call i1 @llvm.experimental.vector.reduce.umax.v1i1(<1 x i1> %a) + %b = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> %a) ret i1 %b } @@ -33,7 +33,7 @@ define i8 @test_v1i8(<1 x i8> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w0, v0.b[0] ; CHECK-NEXT: ret - %b = call i8 @llvm.experimental.vector.reduce.umax.v1i8(<1 x i8> %a) + %b = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> %a) ret i8 %b } @@ -43,7 +43,7 @@ define i16 @test_v1i16(<1 x i16> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w0, v0.h[0] ; CHECK-NEXT: ret - %b = call i16 @llvm.experimental.vector.reduce.umax.v1i16(<1 x i16> %a) + %b = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> %a) ret i16 %b } @@ -51,7 +51,7 @@ define i24 @test_v1i24(<1 x i24> %a) nounwind { ; CHECK-LABEL: test_v1i24: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call i24 @llvm.experimental.vector.reduce.umax.v1i24(<1 x i24> %a) + %b = call i24 @llvm.vector.reduce.umax.v1i24(<1 x i24> %a) ret i24 %b } @@ -61,7 +61,7 @@ define i32 @test_v1i32(<1 x i32> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret - %b = call i32 @llvm.experimental.vector.reduce.umax.v1i32(<1 x i32> %a) + %b = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> %a) ret i32 %b } @@ -71,7 +71,7 @@ define i64 @test_v1i64(<1 x i64> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret - %b = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> %a) + %b = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> %a) ret i64 %b } @@ -79,7 +79,7 @@ define i128 @test_v1i128(<1 x i128> %a) nounwind { ; CHECK-LABEL: test_v1i128: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %b = call i128 @llvm.experimental.vector.reduce.umax.v1i128(<1 x i128> %a) + %b = call i128 @llvm.vector.reduce.umax.v1i128(<1 x i128> %a) ret i128 %b } @@ -92,7 +92,7 @@ define i64 @test_v2i64(<2 x i64> %a) nounwind { ; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: csel x0, x9, x8, hi ; CHECK-NEXT: ret - %b = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %a) + %b = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a) ret i64 %b } @@ -107,7 +107,7 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind { ; CHECK-NEXT: umaxv h0, v0.4h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret - %b = call i8 @llvm.experimental.vector.reduce.umax.v3i8(<3 x i8> %a) + %b = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> %a) ret i8 %b } @@ -124,7 +124,7 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind { ; CHECK-NEXT: umaxv b0, v0.16b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret - %b = call i8 @llvm.experimental.vector.reduce.umax.v9i8(<9 x i8> %a) + %b = call i8 @llvm.vector.reduce.umax.v9i8(<9 x i8> %a) ret i8 %b } @@ -135,7 +135,7 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind { ; CHECK-NEXT: umaxv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret - %b = call i32 @llvm.experimental.vector.reduce.umax.v3i32(<3 x i32> %a) + %b = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a) ret i32 %b } @@ -148,7 +148,7 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind { ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret - %b = call i1 @llvm.experimental.vector.reduce.umax.v4i1(<4 x i1> %a) + %b = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> %a) ret i1 %b } @@ -159,7 +159,7 @@ define i24 @test_v4i24(<4 x i24> %a) nounwind { ; CHECK-NEXT: umaxv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret - %b = call i24 @llvm.experimental.vector.reduce.umax.v4i24(<4 x i24> %a) + %b = call i24 @llvm.vector.reduce.umax.v4i24(<4 x i24> %a) ret i24 %b } @@ -173,7 +173,7 @@ define i128 @test_v2i128(<2 x i128> %a) nounwind { ; CHECK-NEXT: csel x0, x8, x9, eq ; CHECK-NEXT: csel x1, x1, x3, hi ; CHECK-NEXT: ret - %b = call i128 @llvm.experimental.vector.reduce.umax.v2i128(<2 x i128> %a) + %b = call i128 @llvm.vector.reduce.umax.v2i128(<2 x i128> %a) ret i128 %b } @@ -186,6 +186,6 @@ define i32 @test_v16i32(<16 x i32> %a) nounwind { ; CHECK-NEXT: umaxv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret - %b = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %a) + %b = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %a) ret i32 %b } diff --git a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll index aaa376a0ba6e9..9e3d7e7775b4a 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK -declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half, <4 x half>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double, <2 x double>) -declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128, <2 x fp128>) +declare half @llvm.vector.reduce.fadd.f16.v4f16(half, <4 x half>) +declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>) +declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>) +declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>) define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK-LABEL: test_v4f16: @@ -37,7 +37,7 @@ define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK-NEXT: bl __aeabi_f2h ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: mov pc, lr - %b = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half 0.0, <4 x half> %a) + %b = call fast half @llvm.vector.reduce.fadd.f16.v4f16(half 0.0, <4 x half> %a) ret half %b } @@ -55,7 +55,7 @@ define float @test_v4f32(<4 x float> %a) nounwind { ; CHECK-NEXT: bl __aeabi_fadd ; CHECK-NEXT: pop {r4, r5, r11, lr} ; CHECK-NEXT: mov pc, lr - %b = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %a) + %b = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %a) ret float %b } @@ -67,7 +67,7 @@ define double @test_v2f64(<2 x double> %a) nounwind { ; CHECK-NEXT: bl __aeabi_dadd ; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr - %b = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double zeroinitializer, <2 x double> %a) + %b = call fast double @llvm.vector.reduce.fadd.f64.v2f64(double zeroinitializer, <2 x double> %a) ret double %b } @@ -90,6 +90,6 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr - %b = call fast fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a) + %b = call fast fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a) ret fp128 %b } diff --git a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll index 39673bb2d786e..83073b895b2c1 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+neon | FileCheck %s --check-prefix=CHECK -declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half, <1 x half>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float, <1 x float>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double, <1 x double>) -declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128, <1 x fp128>) +declare half @llvm.vector.reduce.fadd.f16.v1f16(half, <1 x half>) +declare float @llvm.vector.reduce.fadd.f32.v1f32(float, <1 x float>) +declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>) +declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float, <3 x float>) -declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128, <2 x fp128>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float, <16 x float>) +declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>) +declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>) +declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>) define half @test_v1f16(<1 x half> %a) nounwind { ; CHECK-LABEL: test_v1f16: @@ -28,7 +28,7 @@ define half @test_v1f16(<1 x half> %a) nounwind { ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI0_0: ; CHECK-NEXT: .long 0x00000000 @ float 0 - %b = call half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half 0.0, <1 x half> %a) + %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half 0.0, <1 x half> %a) ret half %b } @@ -44,7 +44,7 @@ define float @test_v1f32(<1 x float> %a) nounwind { ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI1_0: ; CHECK-NEXT: .long 0x00000000 @ float 0 - %b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float 0.0, <1 x float> %a) + %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float 0.0, <1 x float> %a) ret float %b } @@ -56,7 +56,7 @@ define double @test_v1f64(<1 x double> %a) nounwind { ; CHECK-NEXT: vadd.f64 d16, d17, d16 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: mov pc, lr - %b = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double 0.0, <1 x double> %a) + %b = call double @llvm.vector.reduce.fadd.f64.v1f64(double 0.0, <1 x double> %a) ret double %b } @@ -76,7 +76,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind { ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr - %b = call fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a) + %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a) ret fp128 %b } @@ -95,7 +95,7 @@ define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI4_0: ; CHECK-NEXT: .long 0x00000000 @ float 0 - %b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float 0.0, <3 x float> %a) + %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float 0.0, <3 x float> %a) ret float %b } @@ -124,7 +124,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: pop {r4, r5, r11, lr} ; CHECK-NEXT: mov pc, lr - %b = call fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a) + %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a) ret fp128 %b } @@ -162,6 +162,6 @@ define float @test_v16f32(<16 x float> %a) nounwind { ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI6_0: ; CHECK-NEXT: .long 0x00000000 @ float 0 - %b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.0, <16 x float> %a) + %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a) ret float %b } diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll index 586a02b92bf3c..48968ee7ba771 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK -declare half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half>) -declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) -declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>) -declare fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128>) +declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>) +declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) +declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) +declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128>) define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK-LABEL: test_v4f16: @@ -37,7 +37,7 @@ define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK-NEXT: bl __aeabi_f2h ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: mov pc, lr - %b = call fast half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %a) + %b = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) ret half %b } @@ -55,7 +55,7 @@ define float @test_v4f32(<4 x float> %a) nounwind { ; CHECK-NEXT: bl fmaxf ; CHECK-NEXT: pop {r4, r5, r11, lr} ; CHECK-NEXT: mov pc, lr - %b = call fast float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a) + %b = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a) ret float %b } @@ -67,7 +67,7 @@ define double @test_v2f64(<2 x double> %a) nounwind { ; CHECK-NEXT: bl fmax ; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr - %b = call fast double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a) + %b = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a) ret double %b } @@ -90,6 +90,6 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr - %b = call fast fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a) + %b = call fast fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a) ret fp128 %b } diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll index b64e4473981bb..1252085ef948d 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK -declare half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half>) -declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>) -declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>) -declare fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128>) +declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>) +declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) +declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) +declare fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128>) define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK-LABEL: test_v4f16: @@ -37,7 +37,7 @@ define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK-NEXT: bl __aeabi_f2h ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: mov pc, lr - %b = call fast half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %a) + %b = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a) ret half %b } @@ -55,7 +55,7 @@ define float @test_v4f32(<4 x float> %a) nounwind { ; CHECK-NEXT: bl fminf ; CHECK-NEXT: pop {r4, r5, r11, lr} ; CHECK-NEXT: mov pc, lr - %b = call fast float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a) + %b = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a) ret float %b } @@ -67,7 +67,7 @@ define double @test_v2f64(<2 x double> %a) nounwind { ; CHECK-NEXT: bl fmin ; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr - %b = call fast double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a) + %b = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a) ret double %b } @@ -90,6 +90,6 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr - %b = call fast fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128> %a) + %b = call fast fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a) ret fp128 %b } diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll index 62111e5f0f342..4217eab6ba70a 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK -declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v4f16(half, <4 x half>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>) -declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double, <2 x double>) -declare fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128, <2 x fp128>) +declare half @llvm.vector.reduce.fmul.f16.v4f16(half, <4 x half>) +declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>) +declare double @llvm.vector.reduce.fmul.f64.v2f64(double, <2 x double>) +declare fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128, <2 x fp128>) define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK-LABEL: test_v4f16: @@ -37,7 +37,7 @@ define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK-NEXT: bl __aeabi_f2h ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: mov pc, lr - %b = call fast half @llvm.experimental.vector.reduce.v2.fmul.f16.v4f16(half 1.0, <4 x half> %a) + %b = call fast half @llvm.vector.reduce.fmul.f16.v4f16(half 1.0, <4 x half> %a) ret half %b } @@ -55,7 +55,7 @@ define float @test_v4f32(<4 x float> %a) nounwind { ; CHECK-NEXT: bl __aeabi_fmul ; CHECK-NEXT: pop {r4, r5, r11, lr} ; CHECK-NEXT: mov pc, lr - %b = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %a) + %b = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a) ret float %b } @@ -67,7 +67,7 @@ define double @test_v2f64(<2 x double> %a) nounwind { ; CHECK-NEXT: bl __aeabi_dmul ; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr - %b = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double 1.0, <2 x double> %a) + %b = call fast double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a) ret double %b } @@ -90,6 +90,6 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr - %b = call fast fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128 0xL00000000000000003fff00000000000000, <2 x fp128> %a) + %b = call fast fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128 0xL00000000000000003fff00000000000000, <2 x fp128> %a) ret fp128 %b } diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll index 003b64be09a63..b3d23addad418 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+neon | FileCheck %s --check-prefix=CHECK -declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v1f16(half, <1 x half>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v1f32(float, <1 x float>) -declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double, <1 x double>) -declare fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v1f128(fp128, <1 x fp128>) +declare half @llvm.vector.reduce.fmul.f16.v1f16(half, <1 x half>) +declare float @llvm.vector.reduce.fmul.f32.v1f32(float, <1 x float>) +declare double @llvm.vector.reduce.fmul.f64.v1f64(double, <1 x double>) +declare fp128 @llvm.vector.reduce.fmul.f128.v1f128(fp128, <1 x fp128>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v3f32(float, <3 x float>) -declare fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128, <2 x fp128>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float, <16 x float>) +declare float @llvm.vector.reduce.fmul.f32.v3f32(float, <3 x float>) +declare fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128, <2 x fp128>) +declare float @llvm.vector.reduce.fmul.f32.v16f32(float, <16 x float>) define half @test_v1f16(<1 x half> %a) nounwind { ; CHECK-LABEL: test_v1f16: @@ -28,7 +28,7 @@ define half @test_v1f16(<1 x half> %a) nounwind { ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI0_0: ; CHECK-NEXT: .long 0x00000000 @ float 0 - %b = call half @llvm.experimental.vector.reduce.v2.fmul.f16.v1f16(half 0.0, <1 x half> %a) + %b = call half @llvm.vector.reduce.fmul.f16.v1f16(half 0.0, <1 x half> %a) ret half %b } @@ -44,7 +44,7 @@ define float @test_v1f32(<1 x float> %a) nounwind { ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI1_0: ; CHECK-NEXT: .long 0x00000000 @ float 0 - %b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v1f32(float 0.0, <1 x float> %a) + %b = call float @llvm.vector.reduce.fmul.f32.v1f32(float 0.0, <1 x float> %a) ret float %b } @@ -56,7 +56,7 @@ define double @test_v1f64(<1 x double> %a) nounwind { ; CHECK-NEXT: vmul.f64 d16, d17, d16 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: mov pc, lr - %b = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double 0.0, <1 x double> %a) + %b = call double @llvm.vector.reduce.fmul.f64.v1f64(double 0.0, <1 x double> %a) ret double %b } @@ -76,7 +76,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind { ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr - %b = call fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a) + %b = call fp128 @llvm.vector.reduce.fmul.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a) ret fp128 %b } @@ -95,7 +95,7 @@ define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI4_0: ; CHECK-NEXT: .long 0x00000000 @ float 0 - %b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v3f32(float 0.0, <3 x float> %a) + %b = call float @llvm.vector.reduce.fmul.f32.v3f32(float 0.0, <3 x float> %a) ret float %b } @@ -124,7 +124,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: pop {r4, r5, r11, lr} ; CHECK-NEXT: mov pc, lr - %b = call fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a) + %b = call fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a) ret fp128 %b } @@ -162,6 +162,6 @@ define float @test_v16f32(<16 x float> %a) nounwind { ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI6_0: ; CHECK-NEXT: .long 0x00000000 @ float 0 - %b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float 0.0, <16 x float> %a) + %b = call float @llvm.vector.reduce.fmul.f32.v16f32(float 0.0, <16 x float> %a) ret float %b } diff --git a/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll b/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll index e0e3149e35119..99aa181bd70f8 100644 --- a/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll +++ b/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll @@ -1,24 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -expand-reductions -S | FileCheck %s ; Tests without a target which should expand all reductions -declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>) -declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) -declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) -declare i8 @llvm.experimental.vector.reduce.and.i8.v3i8(<3 x i8>) +declare i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8>) define i64 @add_i64(<2 x i64> %vec) { ; CHECK-LABEL: @add_i64( @@ -29,7 +29,7 @@ define i64 @add_i64(<2 x i64> %vec) { ; CHECK-NEXT: ret i64 [[TMP0]] ; entry: - %r = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %vec) + %r = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %vec) ret i64 %r } @@ -42,7 +42,7 @@ define i64 @mul_i64(<2 x i64> %vec) { ; CHECK-NEXT: ret i64 [[TMP0]] ; entry: - %r = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> %vec) + %r = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %vec) ret i64 %r } @@ -55,7 +55,7 @@ define i64 @and_i64(<2 x i64> %vec) { ; CHECK-NEXT: ret i64 [[TMP0]] ; entry: - %r = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %vec) + %r = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %vec) ret i64 %r } @@ -68,7 +68,7 @@ define i64 @or_i64(<2 x i64> %vec) { ; CHECK-NEXT: ret i64 [[TMP0]] ; entry: - %r = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %vec) + %r = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %vec) ret i64 %r } @@ -81,7 +81,7 @@ define i64 @xor_i64(<2 x i64> %vec) { ; CHECK-NEXT: ret i64 [[TMP0]] ; entry: - %r = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> %vec) + %r = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %vec) ret i64 %r } @@ -97,7 +97,7 @@ define float @fadd_f32(<4 x float> %vec) { ; CHECK-NEXT: ret float [[BIN_RDX3]] ; entry: - %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %vec) + %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %vec) ret float %r } @@ -113,7 +113,7 @@ define float @fadd_f32_accum(float %accum, <4 x float> %vec) { ; CHECK-NEXT: ret float [[BIN_RDX3]] ; entry: - %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %accum, <4 x float> %vec) + %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec) ret float %r } @@ -131,7 +131,7 @@ define float @fadd_f32_strict(<4 x float> %vec) { ; CHECK-NEXT: ret float [[BIN_RDX3]] ; entry: - %r = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float undef, <4 x float> %vec) + %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec) ret float %r } @@ -149,7 +149,7 @@ define float @fadd_f32_strict_accum(float %accum, <4 x float> %vec) { ; CHECK-NEXT: ret float [[BIN_RDX3]] ; entry: - %r = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %accum, <4 x float> %vec) + %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec) ret float %r } @@ -165,7 +165,7 @@ define float @fmul_f32(<4 x float> %vec) { ; CHECK-NEXT: ret float [[BIN_RDX3]] ; entry: - %r = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %vec) + %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %vec) ret float %r } @@ -181,7 +181,7 @@ define float @fmul_f32_accum(float %accum, <4 x float> %vec) { ; CHECK-NEXT: ret float [[BIN_RDX3]] ; entry: - %r = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %accum, <4 x float> %vec) + %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec) ret float %r } @@ -199,7 +199,7 @@ define float @fmul_f32_strict(<4 x float> %vec) { ; CHECK-NEXT: ret float [[BIN_RDX3]] ; entry: - %r = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float undef, <4 x float> %vec) + %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec) ret float %r } @@ -217,7 +217,7 @@ define float @fmul_f32_strict_accum(float %accum, <4 x float> %vec) { ; CHECK-NEXT: ret float [[BIN_RDX3]] ; entry: - %r = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %accum, <4 x float> %vec) + %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec) ret float %r } @@ -231,7 +231,7 @@ define i64 @smax_i64(<2 x i64> %vec) { ; CHECK-NEXT: ret i64 [[TMP0]] ; entry: - %r = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %vec) + %r = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %vec) ret i64 %r } @@ -245,7 +245,7 @@ define i64 @smin_i64(<2 x i64> %vec) { ; CHECK-NEXT: ret i64 [[TMP0]] ; entry: - %r = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %vec) + %r = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %vec) ret i64 %r } @@ -259,7 +259,7 @@ define i64 @umax_i64(<2 x i64> %vec) { ; CHECK-NEXT: ret i64 [[TMP0]] ; entry: - %r = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %vec) + %r = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %vec) ret i64 %r } @@ -273,7 +273,7 @@ define i64 @umin_i64(<2 x i64> %vec) { ; CHECK-NEXT: ret i64 [[TMP0]] ; entry: - %r = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %vec) + %r = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %vec) ret i64 %r } @@ -282,11 +282,11 @@ entry: define double @fmax_f64(<2 x double> %vec) { ; CHECK-LABEL: @fmax_f64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[R:%.*]] = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> [[VEC:%.*]]) +; CHECK-NEXT: [[R:%.*]] = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> [[VEC:%.*]]) ; CHECK-NEXT: ret double [[R]] ; entry: - %r = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %vec) + %r = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %vec) ret double %r } @@ -295,11 +295,11 @@ entry: define double @fmin_f64(<2 x double> %vec) { ; CHECK-LABEL: @fmin_f64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[R:%.*]] = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> [[VEC:%.*]]) +; CHECK-NEXT: [[R:%.*]] = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> [[VEC:%.*]]) ; CHECK-NEXT: ret double [[R]] ; entry: - %r = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %vec) + %r = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %vec) ret double %r } @@ -309,10 +309,10 @@ entry: define i8 @test_v3i8(<3 x i8> %a) nounwind { ; CHECK-LABEL: @test_v3i8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B:%.*]] = call i8 @llvm.experimental.vector.reduce.and.v3i8(<3 x i8> [[A:%.*]]) +; CHECK-NEXT: [[B:%.*]] = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> [[A:%.*]]) ; CHECK-NEXT: ret i8 [[B]] ; entry: - %b = call i8 @llvm.experimental.vector.reduce.and.i8.v3i8(<3 x i8> %a) + %b = call i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8> %a) ret i8 %b } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir index 77e35ded50c13..5bc82d493abb8 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir @@ -44,7 +44,7 @@ %add7 = add <4 x i32> %mul, %splat.output %max = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %add7, <4 x i32> %.splat.i42, i32 1, <4 x i1> %pred, <4 x i32> undef) %min = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %max, <4 x i32> %.splat.i, i32 1, <4 x i1> %pred, <4 x i32> undef) - %reduce = tail call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %min) + %reduce = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %min) store i32 %reduce, i32* %scevgep2 %add.ptr = getelementptr inbounds i8, i8* %input_1_vect.addr.052, i32 4 %add.ptr14 = getelementptr inbounds i8, i8* %input_2_vect.addr.051, i32 4 @@ -62,7 +62,7 @@ declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1 declare i1 @llvm.test.set.loop.iterations.i32(i32) #4 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #4 - declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #5 + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #5 ... --- diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll index 522cce49f75a1..29ecf00c556f0 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -85,7 +85,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %tmp8 = select <4 x i1> %tmp1, <4 x i32> %add, <4 x i32> %vec.phi - %tmp9 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp8) + %tmp9 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp8) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -188,7 +188,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %acc = select <4 x i1> %tmp1, <4 x i32> %add, <4 x i32> %vec.phi - %reduce = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %acc) + %reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %acc) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -287,7 +287,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %acc = select <4 x i1> %tmp1, <4 x i32> %add, <4 x i32> %vec.phi - %reduce = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %acc) + %reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %acc) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -386,7 +386,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %acc = select <4 x i1> %tmp1, <4 x i32> %add, <4 x i32> %vec.phi - %reduce = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %acc) + %reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %acc) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -528,6 +528,6 @@ declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) ; Function Attrs: nounwind readnone willreturn -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir index f27a98c3837f0..ab3c866b015d7 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir @@ -56,7 +56,7 @@ br i1 %tmp16, label %vector.body, label %middle.block middle.block: ; preds = %vector.body - %tmp17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp14) + %tmp17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp14) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -64,7 +64,7 @@ ret i32 %res.0.lcssa } declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 - declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2 + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2 declare void @llvm.set.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir index 5db355a86f23a..b796712aa6ac0 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir @@ -58,7 +58,7 @@ br i1 %tmp16, label %vector.body, label %middle.block middle.block: ; preds = %vector.body - %tmp17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp14) + %tmp17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp14) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -66,7 +66,7 @@ ret i32 %res.0.lcssa } declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 - declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2 + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2 declare void @llvm.set.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir index ab2ffb50c6f8c..2b354005535ed 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir @@ -68,7 +68,7 @@ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> %tmp12 = mul nsw <4 x i32> %pass, %tmp10 - %tmp13 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp12) + %tmp13 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp12) %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 %tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp16 = icmp ne i32 %tmp15, 0 @@ -105,7 +105,7 @@ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> %tmp12 = add nsw <4 x i32> %pass, %tmp10 - %tmp13 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp12) + %tmp13 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp12) %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 %tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp16 = icmp ne i32 %tmp15, 0 @@ -117,7 +117,7 @@ ret i32 %res } - declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) declare void @llvm.set.loop.iterations.i32(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir index e5131fd4e1b41..cdf53b8666ee2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir @@ -40,7 +40,7 @@ br i1 %15, label %vector.body, label %middle.block middle.block: ; preds = %vector.body - %16 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %13) + %16 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %13) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -88,7 +88,7 @@ br i1 %15, label %vector.body, label %middle.block middle.block: ; preds = %vector.body - %16 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %13) + %16 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %13) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -98,7 +98,7 @@ declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) - declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>) + declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>) declare void @llvm.set.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32(i32, i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir index 886fbe7f0b17c..f9d1abbee9e54 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir @@ -91,7 +91,7 @@ %22 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %10) %23 = bitcast i16* %lsr.iv7 to i1* %24 = select <4 x i1> %22, <4 x i32> %.lcssa, <4 x i32> %vec.phi.lcssa - %25 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %24) + %25 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %24) %sunkaddr = mul i32 %i.064.us, 4 %26 = bitcast i32* %e to i8* %sunkaddr17 = getelementptr inbounds i8, i8* %26, i32 %sunkaddr @@ -141,7 +141,7 @@ } declare dso_local arm_aapcs_vfpcc signext i16 @crc16(...) local_unnamed_addr #0 declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 - declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2 + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2 declare void @llvm.set.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll index 2b90065ea0e80..5c3af352782b0 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -69,7 +69,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %7 = select <4 x i1> %1, <4 x i32> %5, <4 x i32> %vec.phi - %8 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %7) + %8 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -145,7 +145,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %7 = select <4 x i1> %1, <4 x i32> %5, <4 x i32> %vec.phi - %8 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %7) + %8 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -221,7 +221,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %7 = select <4 x i1> %1, <4 x i32> %5, <4 x i32> %vec.phi - %8 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %7) + %8 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -297,7 +297,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %7 = select <4 x i1> %1, <4 x i32> %5, <4 x i32> %vec.phi - %8 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %7) + %8 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -371,7 +371,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %6 = select <4 x i1> %1, <4 x i32> %4, <4 x i32> %vec.phi - %7 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %6) + %7 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %6) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -1273,6 +1273,6 @@ declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll index e5bcf2e6077f7..c797e0401d4f2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll @@ -51,7 +51,7 @@ define void @mat_vec_sext_i16(i16** nocapture readonly %A, i16* nocapture readon ; CHECK-NEXT: br i1 [[TMP16]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP14]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP17]]) +; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP17]]) ; CHECK-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8_US]], align 4 ; CHECK-NEXT: [[INC10_US]] = add nuw i32 [[I_025_US]], 1 ; CHECK-NEXT: [[EXITCOND27:%.*]] = icmp eq i32 [[INC10_US]], [[N]] @@ -112,7 +112,7 @@ vector.body: ; preds = %vector.body, %for.c middle.block: ; preds = %vector.body %tmp17 = select <4 x i1> %tmp7, <4 x i32> %tmp14, <4 x i32> %vec.phi - %tmp18 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp17) + %tmp18 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp17) store i32 %tmp18, i32* %arrayidx8.us, align 4 %inc10.us = add nuw i32 %i.025.us, 1 %exitcond27 = icmp eq i32 %inc10.us, %N @@ -170,7 +170,7 @@ define void @mat_vec_i32(i32** nocapture readonly %A, i32* nocapture readonly %B ; CHECK-NEXT: br i1 [[TMP14]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP12]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP15]]) +; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP15]]) ; CHECK-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX7_US]], align 4 ; CHECK-NEXT: [[INC9_US]] = add nuw i32 [[I_024_US]], 1 ; CHECK-NEXT: [[EXITCOND26:%.*]] = icmp eq i32 [[INC9_US]], [[N]] @@ -229,7 +229,7 @@ vector.body: ; preds = %vector.body, %for.c middle.block: ; preds = %vector.body %tmp15 = select <4 x i1> %tmp7, <4 x i32> %tmp12, <4 x i32> %vec.phi - %tmp16 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp15) + %tmp16 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp15) store i32 %tmp16, i32* %arrayidx7.us, align 4 %inc9.us = add nuw i32 %i.024.us, 1 %exitcond26 = icmp eq i32 %inc9.us, %N @@ -247,7 +247,7 @@ declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #0 ; Function Attrs: nounwind readnone willreturn -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #1 +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #1 ; Function Attrs: noduplicate nounwind declare void @llvm.set.loop.iterations.i32(i32) #2 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir index 9eb95d7e8072c..4a5f48331090e 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir @@ -40,7 +40,7 @@ br i1 %tmp15, label %vector.body, label %middle.block middle.block: ; preds = %vector.body - %tmp16 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp13) + %tmp16 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp13) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -48,7 +48,7 @@ ret i32 %res.0.lcssa } declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 - declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2 + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2 declare void @llvm.set.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir index 65f9cc3176b1e..c27a6c32f5b31 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir @@ -44,7 +44,7 @@ %.lcssa = phi <16 x i8> [ %13, %vector.body ] %16 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %7) %17 = select <16 x i1> %16, <16 x i8> %.lcssa, <16 x i8> %vec.phi.lcssa - %18 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %17) + %18 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %17) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -53,7 +53,7 @@ } declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) #1 - declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) #2 + declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) #2 declare void @llvm.set.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 declare <16 x i1> @llvm.arm.mve.vctp8(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir index 966bdc913c46a..3a098f272cc0f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir @@ -36,7 +36,7 @@ br i1 %cmp, label %for.body, label %middle.block middle.block: ; preds = %for.body - %reduce = tail call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %acc.next) + %reduce = tail call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %acc.next) ret i16 %reduce for.cond.cleanup: ; preds = %entry @@ -47,7 +47,7 @@ declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>) #2 declare i1 @llvm.test.set.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 - declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) #4 + declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) #4 declare <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #1 ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir index f013cb2f86156..c3655baeb8b1c 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir @@ -41,7 +41,7 @@ br i1 %16, label %vector.body, label %middle.block middle.block: ; preds = %vector.body - %17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %14) + %17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %14) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -88,7 +88,7 @@ br i1 %16, label %vector.body, label %middle.block middle.block: ; preds = %vector.body - %17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %14) + %17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %14) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -135,7 +135,7 @@ br i1 %16, label %vector.body, label %middle.block middle.block: ; preds = %vector.body - %17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %14) + %17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %14) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -182,7 +182,7 @@ br i1 %16, label %vector.body, label %middle.block middle.block: ; preds = %vector.body - %17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %14) + %17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %14) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -228,7 +228,7 @@ br i1 %14, label %vector.body, label %middle.block middle.block: ; preds = %vector.body - %15 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %12) + %15 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %12) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -274,7 +274,7 @@ br i1 %14, label %vector.body, label %middle.block middle.block: ; preds = %vector.body - %15 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %12) + %15 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %12) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -285,7 +285,7 @@ declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>) declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) - declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare void @llvm.set.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32(i32, i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll index a0cdb822b370f..f9116634a4d51 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -45,7 +45,7 @@ vector.body: ; preds = %vector.body, %vecto %wide.masked.load16 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %i3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) %i4 = add <16 x i8> %wide.masked.load, %wide.masked.load16 %i5 = select <16 x i1> %active.lane.mask, <16 x i8> %i4, <16 x i8> %vec.phi - %i6 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %i5) + %i6 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %i5) %index.next = add i32 %index, 16 %i7 = icmp eq i32 %index.next, %n.vec br i1 %i7, label %middle.block, label %vector.body @@ -123,7 +123,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %i9 = select <8 x i1> %active.lane.mask, <8 x i16> %i7, <8 x i16> %vec.phi - %i10 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %i9) + %i10 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %i9) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -193,7 +193,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %i7 = select <16 x i1> %active.lane.mask, <16 x i8> %i5, <16 x i8> %vec.phi - %i8 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %i7) + %i8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %i7) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -265,7 +265,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %i9 = select <8 x i1> %active.lane.mask, <8 x i16> %i7, <8 x i16> %vec.phi - %i10 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %i9) + %i10 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %i9) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -335,7 +335,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %i7 = select <16 x i1> %active.lane.mask, <16 x i8> %i5, <16 x i8> %vec.phi - %i8 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %i7) + %i8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %i7) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -407,7 +407,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %i9 = select <8 x i1> %active.lane.mask, <8 x i16> %i7, <8 x i16> %vec.phi - %i10 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %i9) + %i10 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %i9) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -504,7 +504,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %i9 = select <4 x i1> %active.lane.mask, <4 x i32> %i7, <4 x i32> %vec.phi - %i10 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %i9) + %i10 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %i9) br i1 %cmp35, label %for.cond.cleanup7, label %vector.ph47 vector.ph47: ; preds = %middle.block @@ -534,7 +534,7 @@ vector.body46: ; preds = %vector.body46, %vec middle.block44: ; preds = %vector.body46 %i21 = select <4 x i1> %active.lane.mask61, <4 x i32> %i19, <4 x i32> %vec.phi60 - %i22 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %i21) + %i22 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %i21) br label %for.cond.cleanup7 for.cond.cleanup7: ; preds = %middle.block44, %middle.block, %entry @@ -620,9 +620,9 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %i11 = select <8 x i1> %active.lane.mask, <8 x i16> %i8, <8 x i16> %vec.phi - %i12 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %i11) + %i12 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %i11) %i13 = select <8 x i1> %active.lane.mask, <8 x i16> %i9, <8 x i16> %vec.phi.1 - %i14 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %i13) + %i14 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %i13) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -747,7 +747,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %10 = select <4 x i1> %active.lane.mask, <4 x i32> %8, <4 x i32> %vec.phi - %11 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %10) + %11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %10) br label %for.end for.end: ; preds = %middle.block, %lor.end @@ -758,10 +758,10 @@ for.end: ; preds = %middle.block, %lor. declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>) -declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir index 17acf67a112bd..497f0412589b2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir @@ -46,7 +46,7 @@ %.lcssa = phi <4 x i32> [ %15, %vector.body ], !dbg !38 %18 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %9), !dbg !34 %19 = select <4 x i1> %18, <4 x i32> %.lcssa, <4 x i32> %vec.phi.lcssa, !dbg !38 - %20 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %19), !dbg !32 + %20 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %19), !dbg !32 br label %for.cond.cleanup, !dbg !42 for.cond.cleanup: ; preds = %middle.block, %entry @@ -58,7 +58,7 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) - declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare void @llvm.set.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll index 338c980eeb9b0..d786209ad3fb0 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll @@ -258,7 +258,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %19 = select <4 x i1> %active.lane.mask, <4 x i32> %16, <4 x i32> %vec.phi - %20 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %19) + %20 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %19) br label %for.end for.end: ; preds = %middle.block, %for.body @@ -282,6 +282,6 @@ declare void @llvm.set.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare i32 @llvm.loop.decrement.reg.i32(i32, i32) declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll index 26be5328027a6..6d140589287d8 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll @@ -74,14 +74,14 @@ vector.body: ; preds = %vector.body, %entry br i1 %8, label %middle.block, label %vector.body middle.block: ; preds = %vector.body - %9 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %7) - %10 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %5) + %9 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %7) + %10 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %5) store i32 %10, i32* %minp, align 4 ret i32 %9 } declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) #1 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2 -declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) #3 -declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) #3 +declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) #3 +declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) #3 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir index 1f212c9e3aa3f..dec54006c7d96 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir @@ -26,7 +26,7 @@ %tmp8 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %tmp7) %tmp9 = sub i32 %tmp7, 8 %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef) - %min = tail call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %wide.masked.load) + %min = tail call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %wide.masked.load) store i16 %min, i16* %lsr.iv.2 %scevgep = getelementptr i16, i16* %lsr.iv, i32 8 %scevgep.2 = getelementptr i16, i16* %lsr.iv.2, i32 1 @@ -43,7 +43,7 @@ declare void @llvm.set.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <8 x i1> @llvm.arm.mve.vctp16(i32) - declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>) + declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) ... --- diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir index cd8310c1004b7..1d9f7d72877e5 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir @@ -26,7 +26,7 @@ %tmp9 = sub i32 %tmp7, 4 %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> - %tmp11 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp10) + %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp10) store i32 %tmp11, i32* %store.addr %store.next = getelementptr i32, i32* %store.addr, i32 1 %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 @@ -64,7 +64,7 @@ %tmp9 = sub i32 %tmp7, 8 %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef) %sext = sext <8 x i16> %wide.masked.load to <8 x i32> - %tmp11 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %sext) + %tmp11 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %sext) store i32 %tmp11, i32* %store.addr %store.next = getelementptr i32, i32* %store.addr, i32 1 %scevgep = getelementptr i16, i16* %lsr.iv, i32 8 @@ -102,7 +102,7 @@ %tmp9 = sub i32 %tmp7, 16 %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef) %sext = sext <16 x i8> %wide.masked.load to <16 x i32> - %tmp11 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %sext) + %tmp11 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %sext) store i32 %tmp11, i32* %store.addr %store.next = getelementptr i32, i32* %store.addr, i32 1 %scevgep = getelementptr i8, i8* %lsr.iv, i32 16 @@ -140,7 +140,7 @@ %tmp9 = sub i32 %tmp7, 4 %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> - %tmp11 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp10) + %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp10) %acc.next = add i32 %tmp11, %acc %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) @@ -179,7 +179,7 @@ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> %not = xor <4 x i32> %tmp10, - %tmp11 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %not) + %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %not) store i32 %tmp11, i32* %store.addr %store.next = getelementptr i32, i32* %store.addr, i32 1 %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 @@ -218,7 +218,7 @@ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> %not = xor <4 x i32> %tmp10, - %tmp11 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %not) + %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %not) %acc.next = add i32 %tmp11, %acc %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) @@ -257,7 +257,7 @@ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = zext <4 x i16> %wide.masked.load to <4 x i32> %not = xor <4 x i32> %tmp10, - %tmp11 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %not) + %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %not) store i32 %tmp11, i32* %store.addr %store.next = getelementptr i32, i32* %store.addr, i32 1 %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 @@ -296,7 +296,7 @@ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = zext <4 x i16> %wide.masked.load to <4 x i32> %not = xor <4 x i32> %tmp10, - %tmp11 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %not) + %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %not) %acc.next = add i32 %tmp11, %acc %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) @@ -335,7 +335,7 @@ %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %lsr.iv17, i32 1, <8 x i1> %tmp8, <8 x i8> undef) %sext.wide = sext <8 x i8> %wide.masked.load to <8 x i16> %sub = sub <8 x i16> %sext.wide, %pass - %reduce = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %sub) + %reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %sub) %sext.reduce = sext i16 %reduce to i32 store i32 %sext.reduce, i32* %store.addr %store.next = getelementptr i32, i32* %store.addr, i32 1 @@ -375,7 +375,7 @@ %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %lsr.iv17, i32 1, <8 x i1> %tmp8, <8 x i8> undef) %sext.wide = sext <8 x i8> %wide.masked.load to <8 x i16> %sub = sub <8 x i16> %sext.wide, %pass - %reduce = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %sub) + %reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %sub) %sext.reduce = sext i16 %reduce to i32 %acc.next = add i32 %sext.reduce, %acc %scevgep = getelementptr i8, i8* %lsr.iv, i32 8 @@ -414,7 +414,7 @@ %tmp9 = sub i32 %tmp7, 8 %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef) %sub = sub <8 x i16> %wide.masked.load, %pass - %reduce = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %sub) + %reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %sub) %zext.reduce = zext i16 %reduce to i32 store i32 %zext.reduce, i32* %store.addr %store.next = getelementptr i32, i32* %store.addr, i32 1 @@ -453,7 +453,7 @@ %tmp9 = sub i32 %tmp7, 8 %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef) %sub = sub <8 x i16> %wide.masked.load, %pass - %reduce = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %sub) + %reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %sub) %zext.reduce = zext i16 %reduce to i32 %acc.next = add i32 %zext.reduce, %acc %scevgep = getelementptr i16, i16* %lsr.iv, i32 8 @@ -492,7 +492,7 @@ %tmp9 = sub i32 %tmp7, 16 %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef) %xor = xor <16 x i8> %wide.masked.load, %pass - %reduce = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %xor) + %reduce = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %xor) %sext.reduce = sext i8 %reduce to i32 store i32 %sext.reduce, i32* %store.addr %store.next = getelementptr i32, i32* %store.addr, i32 1 @@ -531,7 +531,7 @@ %tmp9 = sub i32 %tmp7, 16 %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef) %xor = xor <16 x i8> %wide.masked.load, %pass - %reduce = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %xor) + %reduce = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %xor) %sext.reduce = sext i8 %reduce to i32 %acc.next = add i32 %sext.reduce, %acc %scevgep = getelementptr i8, i8* %lsr.iv, i32 16 @@ -570,7 +570,7 @@ %tmp9 = sub i32 %tmp7, 16 %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef) %xor = xor <16 x i8> %wide.masked.load, %pass - %reduce = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %xor) + %reduce = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %xor) %zext.reduce = zext i8 %reduce to i32 store i32 %zext.reduce, i32* %store.addr %store.next = getelementptr i32, i32* %store.addr, i32 1 @@ -609,7 +609,7 @@ %tmp9 = sub i32 %tmp7, 16 %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef) %xor = xor <16 x i8> %wide.masked.load, %pass - %reduce = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %xor) + %reduce = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %xor) %zext.reduce = zext i8 %reduce to i32 %acc.next = add i32 %zext.reduce, %acc %scevgep = getelementptr i8, i8* %lsr.iv, i32 16 @@ -652,7 +652,7 @@ %tmp4 = tail call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %tmp3, i32 2, <4 x i1> %tmp, <4 x i16> zeroinitializer) %zext.wide.2 = zext <4 x i16> %tmp4 to <4 x i32> %or = or <4 x i32> %zext.wide.1, %zext.wide.2 - %reduce = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %or) + %reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %or) %acc.next = add i32 %reduce, %acc %add.ptr = getelementptr inbounds i16, i16* %x.addr.026, i32 4 %add.ptr4 = getelementptr inbounds i16, i16* %y.addr.025, i32 4 @@ -693,7 +693,7 @@ %tmp2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp1, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer) %tmp4 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp3, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer) %or = or <8 x i16> %tmp2, %tmp4 - %reduce = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %or) + %reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %or) %zext.reduce = zext i16 %reduce to i32 %acc.next = add i32 %zext.reduce, %acc %add.ptr = getelementptr inbounds i16, i16* %x.addr.026, i32 8 @@ -737,7 +737,7 @@ %tmp5 = tail call <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp4, i32 0, i32 1) %tmp6 = tail call <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp4, i32 0, i32 0) %mul = add <4 x i32> %tmp5, %tmp6 - %reduce = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %mul) + %reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %mul) %acc.next = add i32 %reduce, %acc %add.ptr = getelementptr inbounds i16, i16* %x.addr.026, i32 8 %add.ptr4 = getelementptr inbounds i16, i16* %y.addr.025, i32 8 @@ -778,7 +778,7 @@ %tmp2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp1, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer) %tmp4 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp3, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer) %mul = tail call <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp4, i32 0, i32 1) - %reduce = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %mul) + %reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %mul) %acc.next = add i32 %reduce, %acc %add.ptr = getelementptr inbounds i16, i16* %x.addr.026, i32 8 %add.ptr4 = getelementptr inbounds i16, i16* %y.addr.025, i32 8 @@ -798,11 +798,11 @@ declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) - declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) - declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) - declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) - declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>) - declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) + declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) + declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) + declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) + declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) declare void @llvm.set.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16>, <8 x i16>, i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll index 15aed3bd4e17a..7aa772c6394b6 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll @@ -214,7 +214,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %i19 = select <4 x i1> %active.lane.mask, <4 x i32> %i16, <4 x i32> %vec.phi - %i20 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %i19) + %i20 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %i19) br label %for.end for.end: ; preds = %middle.block, %for.body @@ -235,6 +235,6 @@ for.end17: ; preds = %for.end, %entry declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare i32 @llvm.loop.decrement.reg.i32(i32, i32) declare void @llvm.set.loop.iterations.i32(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir index 4f80869de3ccb..4308c7e50edaa 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir @@ -47,7 +47,7 @@ %15 = add i32 %8, 4 %16 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %15) %17 = select <4 x i1> %16, <4 x i32> %12, <4 x i32> %vec.phi - %18 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %17) + %18 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %17) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -55,7 +55,7 @@ ret i32 %res.0.lcssa } declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) - declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare void @llvm.set.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir index a42c33e24f26b..9799ceb98c676 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir @@ -46,7 +46,7 @@ %.lcssa = phi <8 x i16> [ %15, %vector.body ] %18 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %7) %19 = select <8 x i1> %18, <8 x i16> %.lcssa, <8 x i16> %vec.phi.lcssa - %20 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %19) + %20 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %19) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -54,7 +54,7 @@ ret i16 %a.0.lcssa } declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>) - declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) + declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) declare void @llvm.set.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <8 x i1> @llvm.arm.mve.vctp16(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll index 6628df20f2024..422fc3c874da9 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll @@ -70,7 +70,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %8 = select <4 x i1> %1, <4 x i32> %6, <4 x i32> %vec.phi - %9 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %8) + %9 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %8) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -141,7 +141,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %5 = select <4 x i1> %1, <4 x i32> %3, <4 x i32> %vec.phi - %6 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %5) + %6 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %5) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -212,7 +212,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %5 = select <4 x i1> %1, <4 x i32> %3, <4 x i32> %vec.phi - %6 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %5) + %6 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %5) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -459,7 +459,7 @@ declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>) declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll index 64e7552b92b36..c05ed7dcfcfb0 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll @@ -16,7 +16,7 @@ ; CHECK: middle.block: ; CHECK: [[VPSEL:%[^ ]+]] = select <4 x i1> [[VCTP]], -; CHECK: call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[VPSEL]]) +; CHECK: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VPSEL]]) define i32 @vec_mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) { entry: @@ -64,7 +64,7 @@ vector.body: ; preds = %vector.body, %vecto middle.block: ; preds = %vector.body %12 = select <4 x i1> %7, <4 x i32> %9, <4 x i32> %vec.phi - %13 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %12) + %13 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %12) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -73,7 +73,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr } declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare void @llvm.set.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir index 23cdf73263b01..07c136ec7c379 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir @@ -118,7 +118,7 @@ middle.block: ; preds = %vector.body %8 = call <4 x i1> @llvm.arm.vctp32(i32 %5) %tmp8 = select <4 x i1> %8, <4 x i32> %tmp6, <4 x i32> %vec.phi - %tmp9 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp8) + %tmp9 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp8) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -134,7 +134,7 @@ declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>) declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) - declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare <16 x i1> @llvm.arm.vctp8(i32) declare void @llvm.stackprotector(i8*, i8**) declare <8 x i1> @llvm.arm.vctp16(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir index fc0aa2020df96..fa7304ebe6ba5 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir @@ -46,7 +46,7 @@ %.lcssa = phi <8 x i16> [ %15, %vector.body ] %18 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %7) %19 = select <8 x i1> %18, <8 x i16> %.lcssa, <8 x i16> %vec.phi.lcssa - %20 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %19) + %20 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %19) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -54,7 +54,7 @@ ret i16 %a.0.lcssa } declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>) - declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) + declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) declare void @llvm.set.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <8 x i1> @llvm.arm.mve.vctp16(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir index d91556e3e70b9..7ef303a1a9499 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir @@ -52,7 +52,7 @@ %n.splat = shufflevector <4 x i32> %insert.n, <4 x i32> undef, <4 x i32> zeroinitializer %tmp16 = icmp ult <4 x i32> %idx.splat, %n.splat %tmp17 = select <4 x i1> %tmp16, <4 x i32> %tmp13, <4 x i32> %vec.phi - %tmp18 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp17) + %tmp18 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp17) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -60,7 +60,7 @@ ret i32 %res.0.lcssa } declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 - declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2 + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2 declare void @llvm.set.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir index 337816146e5f0..00abf1603fb6f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir @@ -45,7 +45,7 @@ middle.block: ; preds = %vector.body %15 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %8) %16 = select <4 x i1> %15, <4 x i32> %12, <4 x i32> %vec.phi - %17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %16) + %17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %16) br label %for.cond.cleanup for.cond.cleanup: ; preds = %middle.block, %entry @@ -53,7 +53,7 @@ ret i32 %res.0.lcssa } declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 - declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2 + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2 declare void @llvm.set.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll index d158c85e401b8..e06ec427599df 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll @@ -572,7 +572,7 @@ vector.body: ; preds = %vector.body, %vecto br i1 %10, label %middle.block, label %vector.body, !llvm.loop !7 middle.block: ; preds = %vector.body - %11 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %9) + %11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %9) ;for.cond8.for.cond.cleanup10_crit_edge.us.us: ; preds = %for.body11.us.us, %middle.block %add19.us.us = add i32 %j.051.us.us, %mul18.us %arrayidx20.us.us = getelementptr inbounds i32, i32* %C, i32 %add19.us.us @@ -803,7 +803,7 @@ vector.body: ; preds = %vector.body, %vecto br i1 %12, label %middle.block, label %vector.body, !llvm.loop !7 middle.block: ; preds = %vector.body - %13 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %11) + %13 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %11) br i1 %cmp.n, label %for.cond5.for.cond.cleanup7_crit_edge.us.us, label %for.body8.us.us.preheader for.cond5.for.cond.cleanup7_crit_edge.us.us: ; preds = %for.body8.us.us, %middle.block @@ -1065,7 +1065,7 @@ vector.body: ; preds = %vector.body, %vecto %wide.masked.gather75 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %tmp85, i32 1, <4 x i1> , <4 x i8> undef) %tmp86 = sext <4 x i8> %wide.masked.gather75 to <4 x i32> %tmp87 = mul nsw <4 x i32> %tmp84, %tmp86 - %tmp88 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp87) + %tmp88 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp87) %tmp89 = add i32 %tmp88, %vec.phi %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, @@ -1091,7 +1091,7 @@ declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, < declare <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>) declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32 immarg, <4 x i1>, <4 x i8>) #3 -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare void @llvm.memset.p0i8.i32(i8* align 2, i8, i32, i1) declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>) diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll index a4a67512b7199..5c32f37455ec6 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll @@ -62,7 +62,7 @@ vector.body: ; preds = %vector.body, %entry br i1 %8, label %middle.block, label %vector.body middle.block: ; preds = %vector.body %9 = select <4 x i1> %active.lane.mask, <4 x i32> %7, <4 x i32> %vec.phi - %10 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %9) + %10 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %9) store i32 %10, i32* %arrayidx.us.us, align 4 %inc21.us.us = add nuw i32 4, 1 %exitcond81.not = icmp eq i32 %inc21.us.us, %n @@ -139,7 +139,7 @@ vector.body: ; preds = %vector.body, %entry br i1 %8, label %middle.block, label %vector.body middle.block: ; preds = %vector.body %9 = select <4 x i1> %active.lane.mask, <4 x i32> %7, <4 x i32> %vec.phi - %10 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %9) + %10 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %9) store i32 %10, i32* %arrayidx.us.us, align 4 %inc21.us.us = add nuw i32 4, 1 %exitcond81.not = icmp eq i32 %inc21.us.us, %n @@ -210,7 +210,7 @@ vector.body: ; preds = %vector.body, %entry br i1 %8, label %middle.block, label %vector.body middle.block: ; preds = %vector.body %9 = select <4 x i1> %active.lane.mask, <4 x i32> %7, <4 x i32> %vec.phi - %10 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %9) + %10 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %9) store i32 %10, i32* %arrayidx.us.us, align 4 %inc21.us.us = add nuw i32 4, 1 %exitcond81.not = icmp eq i32 %inc21.us.us, %n @@ -440,7 +440,7 @@ for.cond.cleanup: ; preds = %vector.body, %for.b ret void } -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32, <4 x i1>, <4 x i8>) declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll index d67ccd9393cc4..b710912e808d2 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -1390,7 +1390,7 @@ declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>) declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>) -declare i32 @llvm.experimental.vector.reduce.add.v16i8(<16 x i32> %ext4) +declare i32 @llvm.vector.reduce.add.v16i8(<16 x i32> %ext4) declare i32 @llvm.arm.mve.vmldava.v8i16(i32, i32, i32, i32, <8 x i16>, <8 x i16>) declare i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32, i32, i32, i32, <16 x i8>, <16 x i8>, <16 x i1>) declare i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32, i32, i32, i32, <8 x i16>, <8 x i16>, <8 x i1>) diff --git a/llvm/test/CodeGen/Thumb2/mve-vaddv.ll b/llvm/test/CodeGen/Thumb2/mve-vaddv.ll index e3f236bedc04f..d4a04567f099f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vaddv.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vaddv.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s -declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>) -declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>) -declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>) +declare i64 @llvm.vector.reduce.add.i64.v2i64(<2 x i64>) +declare i32 @llvm.vector.reduce.add.i32.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.i32.v8i32(<8 x i32>) +declare i16 @llvm.vector.reduce.add.i16.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.add.i16.v16i16(<16 x i16>) +declare i8 @llvm.vector.reduce.add.i8.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.add.i8.v32i8(<32 x i8>) define arm_aapcs_vfpcc i64 @vaddv_v2i64_i64(<2 x i64> %s1) { ; CHECK-LABEL: vaddv_v2i64_i64: @@ -20,7 +20,7 @@ define arm_aapcs_vfpcc i64 @vaddv_v2i64_i64(<2 x i64> %s1) { ; CHECK-NEXT: adcs r1, r2 ; CHECK-NEXT: bx lr entry: - %r = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %s1) + %r = call i64 @llvm.vector.reduce.add.i64.v2i64(<2 x i64> %s1) ret i64 %r } @@ -30,7 +30,7 @@ define arm_aapcs_vfpcc i32 @vaddv_v4i32_i32(<4 x i32> %s1) { ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: bx lr entry: - %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %s1) + %r = call i32 @llvm.vector.reduce.add.i32.v4i32(<4 x i32> %s1) ret i32 %r } @@ -41,7 +41,7 @@ define arm_aapcs_vfpcc i32 @vaddv_v8i32_i32(<8 x i32> %s1) { ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: bx lr entry: - %r = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> %s1) + %r = call i32 @llvm.vector.reduce.add.i32.v8i32(<8 x i32> %s1) ret i32 %r } @@ -51,7 +51,7 @@ define arm_aapcs_vfpcc i16 @vaddv_v8i16_i16(<8 x i16> %s1) { ; CHECK-NEXT: vaddv.u16 r0, q0 ; CHECK-NEXT: bx lr entry: - %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %s1) + %r = call i16 @llvm.vector.reduce.add.i16.v8i16(<8 x i16> %s1) ret i16 %r } @@ -62,7 +62,7 @@ define arm_aapcs_vfpcc i16 @vaddv_v16i16_i16(<16 x i16> %s1) { ; CHECK-NEXT: vaddv.u16 r0, q0 ; CHECK-NEXT: bx lr entry: - %r = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> %s1) + %r = call i16 @llvm.vector.reduce.add.i16.v16i16(<16 x i16> %s1) ret i16 %r } @@ -72,7 +72,7 @@ define arm_aapcs_vfpcc i8 @vaddv_v16i8_i8(<16 x i8> %s1) { ; CHECK-NEXT: vaddv.u8 r0, q0 ; CHECK-NEXT: bx lr entry: - %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %s1) + %r = call i8 @llvm.vector.reduce.add.i8.v16i8(<16 x i8> %s1) ret i8 %r } @@ -83,7 +83,7 @@ define arm_aapcs_vfpcc i8 @vaddv_v32i8_i8(<32 x i8> %s1) { ; CHECK-NEXT: vaddv.u8 r0, q0 ; CHECK-NEXT: bx lr entry: - %r = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> %s1) + %r = call i8 @llvm.vector.reduce.add.i8.v32i8(<32 x i8> %s1) ret i8 %r } @@ -102,7 +102,7 @@ define arm_aapcs_vfpcc i64 @vaddva_v2i64_i64(<2 x i64> %s1, i64 %x) { ; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: pop {r7, pc} entry: - %t = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %s1) + %t = call i64 @llvm.vector.reduce.add.i64.v2i64(<2 x i64> %s1) %r = add i64 %t, %x ret i64 %r } @@ -113,7 +113,7 @@ define arm_aapcs_vfpcc i32 @vaddva_v4i32_i32(<4 x i32> %s1, i32 %x) { ; CHECK-NEXT: vaddva.u32 r0, q0 ; CHECK-NEXT: bx lr entry: - %t = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %s1) + %t = call i32 @llvm.vector.reduce.add.i32.v4i32(<4 x i32> %s1) %r = add i32 %t, %x ret i32 %r } @@ -125,7 +125,7 @@ define arm_aapcs_vfpcc i32 @vaddva_v8i32_i32(<8 x i32> %s1, i32 %x) { ; CHECK-NEXT: vaddva.u32 r0, q0 ; CHECK-NEXT: bx lr entry: - %t = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> %s1) + %t = call i32 @llvm.vector.reduce.add.i32.v8i32(<8 x i32> %s1) %r = add i32 %t, %x ret i32 %r } @@ -136,7 +136,7 @@ define arm_aapcs_vfpcc i16 @vaddva_v8i16_i16(<8 x i16> %s1, i16 %x) { ; CHECK-NEXT: vaddva.u16 r0, q0 ; CHECK-NEXT: bx lr entry: - %t = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %s1) + %t = call i16 @llvm.vector.reduce.add.i16.v8i16(<8 x i16> %s1) %r = add i16 %t, %x ret i16 %r } @@ -148,7 +148,7 @@ define arm_aapcs_vfpcc i16 @vaddva_v16i16_i16(<16 x i16> %s1, i16 %x) { ; CHECK-NEXT: vaddva.u16 r0, q0 ; CHECK-NEXT: bx lr entry: - %t = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> %s1) + %t = call i16 @llvm.vector.reduce.add.i16.v16i16(<16 x i16> %s1) %r = add i16 %t, %x ret i16 %r } @@ -159,7 +159,7 @@ define arm_aapcs_vfpcc i8 @vaddva_v16i8_i8(<16 x i8> %s1, i8 %x) { ; CHECK-NEXT: vaddva.u8 r0, q0 ; CHECK-NEXT: bx lr entry: - %t = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %s1) + %t = call i8 @llvm.vector.reduce.add.i8.v16i8(<16 x i8> %s1) %r = add i8 %t, %x ret i8 %r } @@ -171,7 +171,7 @@ define arm_aapcs_vfpcc i8 @vaddva_v32i8_i8(<32 x i8> %s1, i8 %x) { ; CHECK-NEXT: vaddva.u8 r0, q0 ; CHECK-NEXT: bx lr entry: - %t = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> %s1) + %t = call i8 @llvm.vector.reduce.add.i8.v32i8(<32 x i8> %s1) %r = add i8 %t, %x ret i8 %r } diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll index 35eecabb75ff9..df2cb4361f2ff 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll @@ -7,7 +7,7 @@ define arm_aapcs_vfpcc i32 @add_v4i32_v4i32(<4 x i32> %x) { ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %x) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x) ret i32 %z } @@ -18,7 +18,7 @@ define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_zext(<4 x i32> %x) { ; CHECK-NEXT: bx lr entry: %xx = zext <4 x i32> %x to <4 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) ret i64 %z } @@ -29,7 +29,7 @@ define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_sext(<4 x i32> %x) { ; CHECK-NEXT: bx lr entry: %xx = sext <4 x i32> %x to <4 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) ret i64 %z } @@ -47,7 +47,7 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_zext(<2 x i32> %x) { ; CHECK-NEXT: bx lr entry: %xx = zext <2 x i32> %x to <2 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) ret i64 %z } @@ -65,7 +65,7 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_sext(<2 x i32> %x) { ; CHECK-NEXT: bx lr entry: %xx = sext <2 x i32> %x to <2 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) ret i64 %z } @@ -76,7 +76,7 @@ define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_zext(<8 x i16> %x) { ; CHECK-NEXT: bx lr entry: %xx = zext <8 x i16> %x to <8 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %xx) + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx) ret i32 %z } @@ -87,7 +87,7 @@ define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_sext(<8 x i16> %x) { ; CHECK-NEXT: bx lr entry: %xx = sext <8 x i16> %x to <8 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %xx) + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx) ret i32 %z } @@ -99,7 +99,7 @@ define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_zext(<4 x i16> %x) { ; CHECK-NEXT: bx lr entry: %xx = zext <4 x i16> %x to <4 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %xx) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) ret i32 %z } @@ -111,7 +111,7 @@ define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_sext(<4 x i16> %x) { ; CHECK-NEXT: bx lr entry: %xx = sext <4 x i16> %x to <4 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %xx) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) ret i32 %z } @@ -122,7 +122,7 @@ define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16(<8 x i16> %x) { ; CHECK-NEXT: uxth r0, r0 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %x) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x) ret i16 %z } @@ -175,7 +175,7 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_zext(<8 x i16> %x) { ; CHECK-NEXT: bx lr entry: %xx = zext <8 x i16> %x to <8 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx) ret i64 %z } @@ -242,7 +242,7 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x) { ; CHECK-NEXT: bx lr entry: %xx = sext <8 x i16> %x to <8 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx) ret i64 %z } @@ -258,7 +258,7 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x) { ; CHECK-NEXT: bx lr entry: %xx = zext <2 x i16> %x to <2 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) ret i64 %z } @@ -278,7 +278,7 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_sext(<2 x i16> %x) { ; CHECK-NEXT: bx lr entry: %xx = sext <2 x i16> %x to <2 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) ret i64 %z } @@ -289,7 +289,7 @@ define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_zext(<16 x i8> %x) { ; CHECK-NEXT: bx lr entry: %xx = zext <16 x i8> %x to <16 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %xx) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx) ret i32 %z } @@ -300,7 +300,7 @@ define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_sext(<16 x i8> %x) { ; CHECK-NEXT: bx lr entry: %xx = sext <16 x i8> %x to <16 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %xx) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx) ret i32 %z } @@ -313,7 +313,7 @@ define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_zext(<4 x i8> %x) { ; CHECK-NEXT: bx lr entry: %xx = zext <4 x i8> %x to <4 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %xx) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) ret i32 %z } @@ -326,7 +326,7 @@ define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_sext(<4 x i8> %x) { ; CHECK-NEXT: bx lr entry: %xx = sext <4 x i8> %x to <4 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %xx) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) ret i32 %z } @@ -338,7 +338,7 @@ define arm_aapcs_vfpcc zeroext i16 @add_v16i8_v16i16_zext(<16 x i8> %x) { ; CHECK-NEXT: bx lr entry: %xx = zext <16 x i8> %x to <16 x i16> - %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %xx) + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx) ret i16 %z } @@ -350,7 +350,7 @@ define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_sext(<16 x i8> %x) { ; CHECK-NEXT: bx lr entry: %xx = sext <16 x i8> %x to <16 x i16> - %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %xx) + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx) ret i16 %z } @@ -363,7 +363,7 @@ define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_zext(<8 x i8> %x) { ; CHECK-NEXT: bx lr entry: %xx = zext <8 x i8> %x to <8 x i16> - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %xx) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx) ret i16 %z } @@ -376,7 +376,7 @@ define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_sext(<8 x i8> %x) { ; CHECK-NEXT: bx lr entry: %xx = sext <8 x i8> %x to <8 x i16> - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %xx) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx) ret i16 %z } @@ -387,7 +387,7 @@ define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8(<16 x i8> %x) { ; CHECK-NEXT: uxtb r0, r0 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %x) + %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x) ret i8 %z } @@ -492,7 +492,7 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x) { ; CHECK-NEXT: bx lr entry: %xx = zext <16 x i8> %x to <16 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx) ret i64 %z } @@ -627,7 +627,7 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) { ; CHECK-NEXT: bx lr entry: %xx = sext <16 x i8> %x to <16 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx) ret i64 %z } @@ -643,7 +643,7 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x) { ; CHECK-NEXT: bx lr entry: %xx = zext <2 x i8> %x to <2 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) ret i64 %z } @@ -663,7 +663,7 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_sext(<2 x i8> %x) { ; CHECK-NEXT: bx lr entry: %xx = sext <2 x i8> %x to <2 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) ret i64 %z } @@ -678,7 +678,7 @@ define arm_aapcs_vfpcc i64 @add_v2i64_v2i64(<2 x i64> %x) { ; CHECK-NEXT: adcs r1, r2 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %x) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x) ret i64 %z } @@ -688,7 +688,7 @@ define arm_aapcs_vfpcc i32 @add_v4i32_v4i32_acc(<4 x i32> %x, i32 %a) { ; CHECK-NEXT: vaddva.u32 r0, q0 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %x) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x) %r = add i32 %z, %a ret i32 %r } @@ -700,7 +700,7 @@ define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_acc_zext(<4 x i32> %x, i64 %a) { ; CHECK-NEXT: bx lr entry: %xx = zext <4 x i32> %x to <4 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) %r = add i64 %z, %a ret i64 %r } @@ -712,7 +712,7 @@ define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_acc_sext(<4 x i32> %x, i64 %a) { ; CHECK-NEXT: bx lr entry: %xx = sext <4 x i32> %x to <4 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) %r = add i64 %z, %a ret i64 %r } @@ -735,7 +735,7 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, i64 %a) { ; CHECK-NEXT: pop {r7, pc} entry: %xx = zext <2 x i32> %x to <2 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) %r = add i64 %z, %a ret i64 %r } @@ -756,7 +756,7 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_sext(<2 x i32> %x, i64 %a) { ; CHECK-NEXT: bx lr entry: %xx = sext <2 x i32> %x to <2 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) %r = add i64 %z, %a ret i64 %r } @@ -768,7 +768,7 @@ define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_acc_zext(<8 x i16> %x, i32 %a) { ; CHECK-NEXT: bx lr entry: %xx = zext <8 x i16> %x to <8 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %xx) + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx) %r = add i32 %z, %a ret i32 %r } @@ -780,7 +780,7 @@ define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_acc_sext(<8 x i16> %x, i32 %a) { ; CHECK-NEXT: bx lr entry: %xx = sext <8 x i16> %x to <8 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %xx) + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx) %r = add i32 %z, %a ret i32 %r } @@ -793,7 +793,7 @@ define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_zext(<4 x i16> %x, i32 %a) { ; CHECK-NEXT: bx lr entry: %xx = zext <4 x i16> %x to <4 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %xx) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) %r = add i32 %z, %a ret i32 %r } @@ -806,7 +806,7 @@ define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_sext(<4 x i16> %x, i32 %a) { ; CHECK-NEXT: bx lr entry: %xx = sext <4 x i16> %x to <4 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %xx) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) %r = add i32 %z, %a ret i32 %r } @@ -818,7 +818,7 @@ define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16_acc(<8 x i16> %x, i16 %a) { ; CHECK-NEXT: uxth r0, r0 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %x) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x) %r = add i16 %z, %a ret i16 %r } @@ -876,7 +876,7 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, i64 %a) { ; CHECK-NEXT: pop {r4, pc} entry: %xx = zext <8 x i16> %x to <8 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx) %r = add i64 %z, %a ret i64 %r } @@ -948,7 +948,7 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) { ; CHECK-NEXT: pop {r4, pc} entry: %xx = sext <8 x i16> %x to <8 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx) %r = add i64 %z, %a ret i64 %r } @@ -967,7 +967,7 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, i64 %a) { ; CHECK-NEXT: bx lr entry: %xx = zext <2 x i16> %x to <2 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) %r = add i64 %z, %a ret i64 %r } @@ -990,7 +990,7 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_sext(<2 x i16> %x, i64 %a) { ; CHECK-NEXT: bx lr entry: %xx = sext <2 x i16> %x to <2 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) %r = add i64 %z, %a ret i64 %r } @@ -1002,7 +1002,7 @@ define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_acc_zext(<16 x i8> %x, i32 %a) { ; CHECK-NEXT: bx lr entry: %xx = zext <16 x i8> %x to <16 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %xx) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx) %r = add i32 %z, %a ret i32 %r } @@ -1014,7 +1014,7 @@ define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_acc_sext(<16 x i8> %x, i32 %a) { ; CHECK-NEXT: bx lr entry: %xx = sext <16 x i8> %x to <16 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %xx) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx) %r = add i32 %z, %a ret i32 %r } @@ -1028,7 +1028,7 @@ define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, i32 %a) { ; CHECK-NEXT: bx lr entry: %xx = zext <4 x i8> %x to <4 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %xx) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) %r = add i32 %z, %a ret i32 %r } @@ -1042,7 +1042,7 @@ define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_sext(<4 x i8> %x, i32 %a) { ; CHECK-NEXT: bx lr entry: %xx = sext <4 x i8> %x to <4 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %xx) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) %r = add i32 %z, %a ret i32 %r } @@ -1055,7 +1055,7 @@ define arm_aapcs_vfpcc zeroext i16 @add_v16i8_v16i16_acc_zext(<16 x i8> %x, i16 ; CHECK-NEXT: bx lr entry: %xx = zext <16 x i8> %x to <16 x i16> - %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %xx) + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx) %r = add i16 %z, %a ret i16 %r } @@ -1068,7 +1068,7 @@ define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_acc_sext(<16 x i8> %x, i16 ; CHECK-NEXT: bx lr entry: %xx = sext <16 x i8> %x to <16 x i16> - %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %xx) + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx) %r = add i16 %z, %a ret i16 %r } @@ -1082,7 +1082,7 @@ define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_acc_zext(<8 x i8> %x, i16 %a) ; CHECK-NEXT: bx lr entry: %xx = zext <8 x i8> %x to <8 x i16> - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %xx) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx) %r = add i16 %z, %a ret i16 %r } @@ -1096,7 +1096,7 @@ define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_acc_sext(<8 x i8> %x, i16 %a) ; CHECK-NEXT: bx lr entry: %xx = sext <8 x i8> %x to <8 x i16> - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %xx) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx) %r = add i16 %z, %a ret i16 %r } @@ -1108,7 +1108,7 @@ define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8_acc(<16 x i8> %x, i8 %a) { ; CHECK-NEXT: uxtb r0, r0 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %x) + %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x) %r = add i8 %z, %a ret i8 %r } @@ -1218,7 +1218,7 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, i64 %a) { ; CHECK-NEXT: pop {r4, pc} entry: %xx = zext <16 x i8> %x to <16 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx) %r = add i64 %z, %a ret i64 %r } @@ -1358,7 +1358,7 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) { ; CHECK-NEXT: pop {r4, pc} entry: %xx = sext <16 x i8> %x to <16 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx) %r = add i64 %z, %a ret i64 %r } @@ -1377,7 +1377,7 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, i64 %a) { ; CHECK-NEXT: bx lr entry: %xx = zext <2 x i8> %x to <2 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) %r = add i64 %z, %a ret i64 %r } @@ -1400,7 +1400,7 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_sext(<2 x i8> %x, i64 %a) { ; CHECK-NEXT: bx lr entry: %xx = sext <2 x i8> %x to <2 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) %r = add i64 %z, %a ret i64 %r } @@ -1420,18 +1420,18 @@ define arm_aapcs_vfpcc i64 @add_v2i64_v2i64_acc(<2 x i64> %x, i64 %a) { ; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: pop {r7, pc} entry: - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %x) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x) %r = add i64 %z, %a ret i64 %r } -declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) -declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>) -declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) +declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll index 0f3aacf11d8cf..e59fb0bb1ef49 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll @@ -10,7 +10,7 @@ define arm_aapcs_vfpcc i32 @add_v4i32_v4i32(<4 x i32> %x, <4 x i32> %b) { entry: %c = icmp eq <4 x i32> %b, zeroinitializer %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) ret i32 %z } @@ -24,7 +24,7 @@ entry: %c = icmp eq <4 x i32> %b, zeroinitializer %xx = zext <4 x i32> %x to <4 x i64> %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s) ret i64 %z } @@ -38,7 +38,7 @@ entry: %c = icmp eq <4 x i32> %b, zeroinitializer %xx = sext <4 x i32> %x to <4 x i64> %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s) ret i64 %z } @@ -73,7 +73,7 @@ entry: %c = icmp eq <2 x i32> %b, zeroinitializer %xx = zext <2 x i32> %x to <2 x i64> %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) ret i64 %z } @@ -114,7 +114,7 @@ entry: %c = icmp eq <2 x i32> %b, zeroinitializer %xx = sext <2 x i32> %x to <2 x i64> %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) ret i64 %z } @@ -128,7 +128,7 @@ entry: %c = icmp eq <8 x i16> %b, zeroinitializer %xx = zext <8 x i16> %x to <8 x i32> %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s) ret i32 %z } @@ -142,7 +142,7 @@ entry: %c = icmp eq <8 x i16> %b, zeroinitializer %xx = sext <8 x i16> %x to <8 x i32> %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s) ret i32 %z } @@ -158,7 +158,7 @@ entry: %c = icmp eq <4 x i16> %b, zeroinitializer %xx = zext <4 x i16> %x to <4 x i32> %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) ret i32 %z } @@ -174,7 +174,7 @@ entry: %c = icmp eq <4 x i16> %b, zeroinitializer %xx = sext <4 x i16> %x to <4 x i32> %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) ret i32 %z } @@ -188,7 +188,7 @@ define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16(<8 x i16> %x, <8 x i16> %b) entry: %c = icmp eq <8 x i16> %b, zeroinitializer %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) ret i16 %z } @@ -314,7 +314,7 @@ entry: %c = icmp eq <8 x i16> %b, zeroinitializer %xx = zext <8 x i16> %x to <8 x i64> %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) ret i64 %z } @@ -456,7 +456,7 @@ entry: %c = icmp eq <8 x i16> %b, zeroinitializer %xx = sext <8 x i16> %x to <8 x i64> %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) ret i64 %z } @@ -492,7 +492,7 @@ entry: %c = icmp eq <2 x i16> %b, zeroinitializer %xx = zext <2 x i16> %x to <2 x i64> %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) ret i64 %z } @@ -537,7 +537,7 @@ entry: %c = icmp eq <2 x i16> %b, zeroinitializer %xx = sext <2 x i16> %x to <2 x i64> %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) ret i64 %z } @@ -551,7 +551,7 @@ entry: %c = icmp eq <16 x i8> %b, zeroinitializer %xx = zext <16 x i8> %x to <16 x i32> %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) ret i32 %z } @@ -565,7 +565,7 @@ entry: %c = icmp eq <16 x i8> %b, zeroinitializer %xx = sext <16 x i8> %x to <16 x i32> %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) ret i32 %z } @@ -582,7 +582,7 @@ entry: %c = icmp eq <4 x i8> %b, zeroinitializer %xx = zext <4 x i8> %x to <4 x i32> %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) ret i32 %z } @@ -600,7 +600,7 @@ entry: %c = icmp eq <4 x i8> %b, zeroinitializer %xx = sext <4 x i8> %x to <4 x i32> %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) ret i32 %z } @@ -615,7 +615,7 @@ entry: %c = icmp eq <16 x i8> %b, zeroinitializer %xx = zext <16 x i8> %x to <16 x i16> %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s) ret i16 %z } @@ -630,7 +630,7 @@ entry: %c = icmp eq <16 x i8> %b, zeroinitializer %xx = sext <16 x i8> %x to <16 x i16> %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s) ret i16 %z } @@ -647,7 +647,7 @@ entry: %c = icmp eq <8 x i8> %b, zeroinitializer %xx = zext <8 x i8> %x to <8 x i16> %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) ret i16 %z } @@ -664,7 +664,7 @@ entry: %c = icmp eq <8 x i8> %b, zeroinitializer %xx = sext <8 x i8> %x to <8 x i16> %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) ret i16 %z } @@ -678,7 +678,7 @@ define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8(<16 x i8> %x, <16 x i8> %b) { entry: %c = icmp eq <16 x i8> %b, zeroinitializer %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer - %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %s) + %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %s) ret i8 %z } @@ -948,7 +948,7 @@ entry: %c = icmp eq <16 x i8> %b, zeroinitializer %xx = zext <16 x i8> %x to <16 x i64> %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s) ret i64 %z } @@ -1257,7 +1257,7 @@ entry: %c = icmp eq <16 x i8> %b, zeroinitializer %xx = sext <16 x i8> %x to <16 x i64> %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s) ret i64 %z } @@ -1293,7 +1293,7 @@ entry: %c = icmp eq <2 x i8> %b, zeroinitializer %xx = zext <2 x i8> %x to <2 x i64> %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) ret i64 %z } @@ -1338,7 +1338,7 @@ entry: %c = icmp eq <2 x i8> %b, zeroinitializer %xx = sext <2 x i8> %x to <2 x i64> %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) ret i64 %z } @@ -1372,7 +1372,7 @@ define arm_aapcs_vfpcc i64 @add_v2i64_v2i64(<2 x i64> %x, <2 x i64> %b) { entry: %c = icmp eq <2 x i64> %b, zeroinitializer %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) ret i64 %z } @@ -1385,7 +1385,7 @@ define arm_aapcs_vfpcc i32 @add_v4i32_v4i32_acc(<4 x i32> %x, <4 x i32> %b, i32 entry: %c = icmp eq <4 x i32> %b, zeroinitializer %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -1400,7 +1400,7 @@ entry: %c = icmp eq <4 x i32> %b, zeroinitializer %xx = zext <4 x i32> %x to <4 x i64> %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1415,7 +1415,7 @@ entry: %c = icmp eq <4 x i32> %b, zeroinitializer %xx = sext <4 x i32> %x to <4 x i64> %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1455,7 +1455,7 @@ entry: %c = icmp eq <2 x i32> %b, zeroinitializer %xx = zext <2 x i32> %x to <2 x i64> %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1501,7 +1501,7 @@ entry: %c = icmp eq <2 x i32> %b, zeroinitializer %xx = sext <2 x i32> %x to <2 x i64> %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1516,7 +1516,7 @@ entry: %c = icmp eq <8 x i16> %b, zeroinitializer %xx = zext <8 x i16> %x to <8 x i32> %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -1531,7 +1531,7 @@ entry: %c = icmp eq <8 x i16> %b, zeroinitializer %xx = sext <8 x i16> %x to <8 x i32> %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -1548,7 +1548,7 @@ entry: %c = icmp eq <4 x i16> %b, zeroinitializer %xx = zext <4 x i16> %x to <4 x i32> %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -1565,7 +1565,7 @@ entry: %c = icmp eq <4 x i16> %b, zeroinitializer %xx = sext <4 x i16> %x to <4 x i32> %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -1580,7 +1580,7 @@ define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16_acc(<8 x i16> %x, <8 x i16> entry: %c = icmp eq <8 x i16> %b, zeroinitializer %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) %r = add i16 %z, %a ret i16 %r } @@ -1711,7 +1711,7 @@ entry: %c = icmp eq <8 x i16> %b, zeroinitializer %xx = zext <8 x i16> %x to <8 x i64> %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1858,7 +1858,7 @@ entry: %c = icmp eq <8 x i16> %b, zeroinitializer %xx = sext <8 x i16> %x to <8 x i64> %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1897,7 +1897,7 @@ entry: %c = icmp eq <2 x i16> %b, zeroinitializer %xx = zext <2 x i16> %x to <2 x i64> %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1947,7 +1947,7 @@ entry: %c = icmp eq <2 x i16> %b, zeroinitializer %xx = sext <2 x i16> %x to <2 x i64> %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1962,7 +1962,7 @@ entry: %c = icmp eq <16 x i8> %b, zeroinitializer %xx = zext <16 x i8> %x to <16 x i32> %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -1977,7 +1977,7 @@ entry: %c = icmp eq <16 x i8> %b, zeroinitializer %xx = sext <16 x i8> %x to <16 x i32> %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -1995,7 +1995,7 @@ entry: %c = icmp eq <4 x i8> %b, zeroinitializer %xx = zext <4 x i8> %x to <4 x i32> %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -2014,7 +2014,7 @@ entry: %c = icmp eq <4 x i8> %b, zeroinitializer %xx = sext <4 x i8> %x to <4 x i32> %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -2030,7 +2030,7 @@ entry: %c = icmp eq <16 x i8> %b, zeroinitializer %xx = zext <16 x i8> %x to <16 x i16> %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s) %r = add i16 %z, %a ret i16 %r } @@ -2046,7 +2046,7 @@ entry: %c = icmp eq <16 x i8> %b, zeroinitializer %xx = sext <16 x i8> %x to <16 x i16> %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s) %r = add i16 %z, %a ret i16 %r } @@ -2064,7 +2064,7 @@ entry: %c = icmp eq <8 x i8> %b, zeroinitializer %xx = zext <8 x i8> %x to <8 x i16> %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) %r = add i16 %z, %a ret i16 %r } @@ -2082,7 +2082,7 @@ entry: %c = icmp eq <8 x i8> %b, zeroinitializer %xx = sext <8 x i8> %x to <8 x i16> %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) %r = add i16 %z, %a ret i16 %r } @@ -2097,7 +2097,7 @@ define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8_acc(<16 x i8> %x, <16 x i8> % entry: %c = icmp eq <16 x i8> %b, zeroinitializer %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer - %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %s) + %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %s) %r = add i8 %z, %a ret i8 %r } @@ -2372,7 +2372,7 @@ entry: %c = icmp eq <16 x i8> %b, zeroinitializer %xx = zext <16 x i8> %x to <16 x i64> %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -2686,7 +2686,7 @@ entry: %c = icmp eq <16 x i8> %b, zeroinitializer %xx = sext <16 x i8> %x to <16 x i64> %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -2725,7 +2725,7 @@ entry: %c = icmp eq <2 x i8> %b, zeroinitializer %xx = zext <2 x i8> %x to <2 x i64> %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -2775,7 +2775,7 @@ entry: %c = icmp eq <2 x i8> %b, zeroinitializer %xx = sext <2 x i8> %x to <2 x i64> %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -2814,18 +2814,18 @@ define arm_aapcs_vfpcc i64 @add_v2i64_v2i64_acc(<2 x i64> %x, <2 x i64> %b, i64 entry: %c = icmp eq <2 x i64> %b, zeroinitializer %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) %r = add i64 %z, %a ret i64 %r } -declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) -declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>) -declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) +declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-bit.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-bit.ll index fc06181978b70..cf9c2b6b39f1c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-bit.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-bit.ll @@ -9,7 +9,7 @@ define arm_aapcs_vfpcc i32 @and_v2i32(<2 x i32> %x) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> %x) + %z = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %x) ret i32 %z } @@ -25,7 +25,7 @@ define arm_aapcs_vfpcc i32 @and_v4i32(<4 x i32> %x) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> %x) + %z = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %x) ret i32 %z } @@ -42,7 +42,7 @@ define arm_aapcs_vfpcc i32 @and_v8i32(<8 x i32> %x) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> %x) + %z = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %x) ret i32 %z } @@ -58,7 +58,7 @@ define arm_aapcs_vfpcc i16 @and_v4i16(<4 x i16> %x) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> %x) + %z = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %x) ret i16 %z } @@ -76,7 +76,7 @@ define arm_aapcs_vfpcc i16 @and_v8i16(<8 x i16> %x) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> %x) + %z = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %x) ret i16 %z } @@ -95,7 +95,7 @@ define arm_aapcs_vfpcc i16 @and_v16i16(<16 x i16> %x) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> %x) + %z = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %x) ret i16 %z } @@ -113,7 +113,7 @@ define arm_aapcs_vfpcc i8 @and_v8i8(<8 x i8> %x) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> %x) + %z = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %x) ret i8 %z } @@ -133,7 +133,7 @@ define arm_aapcs_vfpcc i8 @and_v16i8(<16 x i8> %x) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> %x) + %z = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %x) ret i8 %z } @@ -154,7 +154,7 @@ define arm_aapcs_vfpcc i8 @and_v32i8(<32 x i8> %x) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> %x) + %z = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %x) ret i8 %z } @@ -163,7 +163,7 @@ define arm_aapcs_vfpcc i64 @and_v1i64(<1 x i64> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> %x) + %z = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %x) ret i64 %z } @@ -178,7 +178,7 @@ define arm_aapcs_vfpcc i64 @and_v2i64(<2 x i64> %x) { ; CHECK-NEXT: ands r1, r2 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %x) + %z = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %x) ret i64 %z } @@ -194,7 +194,7 @@ define arm_aapcs_vfpcc i64 @and_v4i64(<4 x i64> %x) { ; CHECK-NEXT: ands r1, r2 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> %x) + %z = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %x) ret i64 %z } @@ -207,7 +207,7 @@ define arm_aapcs_vfpcc i32 @and_v2i32_acc(<2 x i32> %x, i32 %y) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> %x) + %z = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %x) %r = and i32 %y, %z ret i32 %r } @@ -225,7 +225,7 @@ define arm_aapcs_vfpcc i32 @and_v4i32_acc(<4 x i32> %x, i32 %y) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> %x) + %z = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %x) %r = and i32 %y, %z ret i32 %r } @@ -244,7 +244,7 @@ define arm_aapcs_vfpcc i32 @and_v8i32_acc(<8 x i32> %x, i32 %y) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> %x) + %z = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %x) %r = and i32 %y, %z ret i32 %r } @@ -262,7 +262,7 @@ define arm_aapcs_vfpcc i16 @and_v4i16_acc(<4 x i16> %x, i16 %y) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> %x) + %z = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %x) %r = and i16 %y, %z ret i16 %r } @@ -282,7 +282,7 @@ define arm_aapcs_vfpcc i16 @and_v8i16_acc(<8 x i16> %x, i16 %y) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> %x) + %z = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %x) %r = and i16 %y, %z ret i16 %r } @@ -303,7 +303,7 @@ define arm_aapcs_vfpcc i16 @and_v16i16_acc(<16 x i16> %x, i16 %y) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> %x) + %z = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %x) %r = and i16 %y, %z ret i16 %r } @@ -323,7 +323,7 @@ define arm_aapcs_vfpcc i8 @and_v8i8_acc(<8 x i8> %x, i8 %y) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> %x) + %z = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %x) %r = and i8 %y, %z ret i8 %r } @@ -345,7 +345,7 @@ define arm_aapcs_vfpcc i8 @and_v16i8_acc(<16 x i8> %x, i8 %y) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> %x) + %z = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %x) %r = and i8 %y, %z ret i8 %r } @@ -368,7 +368,7 @@ define arm_aapcs_vfpcc i8 @and_v32i8_acc(<32 x i8> %x, i8 %y) { ; CHECK-NEXT: ands r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> %x) + %z = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %x) %r = and i8 %y, %z ret i8 %r } @@ -380,7 +380,7 @@ define arm_aapcs_vfpcc i64 @and_v1i64_acc(<1 x i64> %x, i64 %y) { ; CHECK-NEXT: ands r1, r3 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> %x) + %z = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %x) %r = and i64 %y, %z ret i64 %r } @@ -398,7 +398,7 @@ define arm_aapcs_vfpcc i64 @and_v2i64_acc(<2 x i64> %x, i64 %y) { ; CHECK-NEXT: ands r1, r2 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %x) + %z = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %x) %r = and i64 %y, %z ret i64 %r } @@ -417,7 +417,7 @@ define arm_aapcs_vfpcc i64 @and_v4i64_acc(<4 x i64> %x, i64 %y) { ; CHECK-NEXT: ands r1, r2 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> %x) + %z = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %x) %r = and i64 %y, %z ret i64 %r } @@ -430,7 +430,7 @@ define arm_aapcs_vfpcc i32 @or_v2i32(<2 x i32> %x) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> %x) + %z = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %x) ret i32 %z } @@ -446,7 +446,7 @@ define arm_aapcs_vfpcc i32 @or_v4i32(<4 x i32> %x) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %x) + %z = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %x) ret i32 %z } @@ -463,7 +463,7 @@ define arm_aapcs_vfpcc i32 @or_v8i32(<8 x i32> %x) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %x) + %z = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %x) ret i32 %z } @@ -479,7 +479,7 @@ define arm_aapcs_vfpcc i16 @or_v4i16(<4 x i16> %x) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> %x) + %z = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %x) ret i16 %z } @@ -497,7 +497,7 @@ define arm_aapcs_vfpcc i16 @or_v8i16(<8 x i16> %x) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> %x) + %z = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %x) ret i16 %z } @@ -516,7 +516,7 @@ define arm_aapcs_vfpcc i16 @or_v16i16(<16 x i16> %x) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> %x) + %z = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %x) ret i16 %z } @@ -534,7 +534,7 @@ define arm_aapcs_vfpcc i8 @or_v8i8(<8 x i8> %x) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> %x) + %z = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %x) ret i8 %z } @@ -554,7 +554,7 @@ define arm_aapcs_vfpcc i8 @or_v16i8(<16 x i8> %x) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> %x) + %z = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %x) ret i8 %z } @@ -575,7 +575,7 @@ define arm_aapcs_vfpcc i8 @or_v32i8(<32 x i8> %x) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> %x) + %z = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %x) ret i8 %z } @@ -584,7 +584,7 @@ define arm_aapcs_vfpcc i64 @or_v1i64(<1 x i64> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> %x) + %z = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> %x) ret i64 %z } @@ -599,7 +599,7 @@ define arm_aapcs_vfpcc i64 @or_v2i64(<2 x i64> %x) { ; CHECK-NEXT: orrs r1, r2 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %x) + %z = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %x) ret i64 %z } @@ -615,7 +615,7 @@ define arm_aapcs_vfpcc i64 @or_v4i64(<4 x i64> %x) { ; CHECK-NEXT: orrs r1, r2 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> %x) + %z = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %x) ret i64 %z } @@ -628,7 +628,7 @@ define arm_aapcs_vfpcc i32 @or_v2i32_acc(<2 x i32> %x, i32 %y) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> %x) + %z = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %x) %r = or i32 %y, %z ret i32 %r } @@ -646,7 +646,7 @@ define arm_aapcs_vfpcc i32 @or_v4i32_acc(<4 x i32> %x, i32 %y) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %x) + %z = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %x) %r = or i32 %y, %z ret i32 %r } @@ -665,7 +665,7 @@ define arm_aapcs_vfpcc i32 @or_v8i32_acc(<8 x i32> %x, i32 %y) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %x) + %z = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %x) %r = or i32 %y, %z ret i32 %r } @@ -683,7 +683,7 @@ define arm_aapcs_vfpcc i16 @or_v4i16_acc(<4 x i16> %x, i16 %y) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> %x) + %z = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %x) %r = or i16 %y, %z ret i16 %r } @@ -703,7 +703,7 @@ define arm_aapcs_vfpcc i16 @or_v8i16_acc(<8 x i16> %x, i16 %y) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> %x) + %z = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %x) %r = or i16 %y, %z ret i16 %r } @@ -724,7 +724,7 @@ define arm_aapcs_vfpcc i16 @or_v16i16_acc(<16 x i16> %x, i16 %y) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> %x) + %z = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %x) %r = or i16 %y, %z ret i16 %r } @@ -744,7 +744,7 @@ define arm_aapcs_vfpcc i8 @or_v8i8_acc(<8 x i8> %x, i8 %y) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> %x) + %z = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %x) %r = or i8 %y, %z ret i8 %r } @@ -766,7 +766,7 @@ define arm_aapcs_vfpcc i8 @or_v16i8_acc(<16 x i8> %x, i8 %y) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> %x) + %z = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %x) %r = or i8 %y, %z ret i8 %r } @@ -789,7 +789,7 @@ define arm_aapcs_vfpcc i8 @or_v32i8_acc(<32 x i8> %x, i8 %y) { ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> %x) + %z = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %x) %r = or i8 %y, %z ret i8 %r } @@ -801,7 +801,7 @@ define arm_aapcs_vfpcc i64 @or_v1i64_acc(<1 x i64> %x, i64 %y) { ; CHECK-NEXT: orrs r1, r3 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> %x) + %z = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> %x) %r = or i64 %y, %z ret i64 %r } @@ -819,7 +819,7 @@ define arm_aapcs_vfpcc i64 @or_v2i64_acc(<2 x i64> %x, i64 %y) { ; CHECK-NEXT: orrs r1, r2 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %x) + %z = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %x) %r = or i64 %y, %z ret i64 %r } @@ -838,7 +838,7 @@ define arm_aapcs_vfpcc i64 @or_v4i64_acc(<4 x i64> %x, i64 %y) { ; CHECK-NEXT: orrs r1, r2 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> %x) + %z = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %x) %r = or i64 %y, %z ret i64 %r } @@ -851,7 +851,7 @@ define arm_aapcs_vfpcc i32 @xor_v2i32(<2 x i32> %x) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> %x) + %z = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %x) ret i32 %z } @@ -867,7 +867,7 @@ define arm_aapcs_vfpcc i32 @xor_v4i32(<4 x i32> %x) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> %x) + %z = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %x) ret i32 %z } @@ -884,7 +884,7 @@ define arm_aapcs_vfpcc i32 @xor_v8i32(<8 x i32> %x) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> %x) + %z = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %x) ret i32 %z } @@ -900,7 +900,7 @@ define arm_aapcs_vfpcc i16 @xor_v4i16(<4 x i16> %x) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> %x) + %z = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %x) ret i16 %z } @@ -918,7 +918,7 @@ define arm_aapcs_vfpcc i16 @xor_v8i16(<8 x i16> %x) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> %x) + %z = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %x) ret i16 %z } @@ -937,7 +937,7 @@ define arm_aapcs_vfpcc i16 @xor_v16i16(<16 x i16> %x) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> %x) + %z = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %x) ret i16 %z } @@ -955,7 +955,7 @@ define arm_aapcs_vfpcc i8 @xor_v8i8(<8 x i8> %x) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> %x) + %z = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %x) ret i8 %z } @@ -975,7 +975,7 @@ define arm_aapcs_vfpcc i8 @xor_v16i8(<16 x i8> %x) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> %x) + %z = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %x) ret i8 %z } @@ -996,7 +996,7 @@ define arm_aapcs_vfpcc i8 @xor_v32i8(<32 x i8> %x) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> %x) + %z = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %x) ret i8 %z } @@ -1005,7 +1005,7 @@ define arm_aapcs_vfpcc i64 @xor_v1i64(<1 x i64> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> %x) + %z = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> %x) ret i64 %z } @@ -1020,7 +1020,7 @@ define arm_aapcs_vfpcc i64 @xor_v2i64(<2 x i64> %x) { ; CHECK-NEXT: eors r1, r2 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> %x) + %z = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %x) ret i64 %z } @@ -1036,7 +1036,7 @@ define arm_aapcs_vfpcc i64 @xor_v4i64(<4 x i64> %x) { ; CHECK-NEXT: eors r1, r2 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> %x) + %z = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %x) ret i64 %z } @@ -1049,7 +1049,7 @@ define arm_aapcs_vfpcc i32 @xor_v2i32_acc(<2 x i32> %x, i32 %y) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> %x) + %z = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %x) %r = xor i32 %y, %z ret i32 %r } @@ -1067,7 +1067,7 @@ define arm_aapcs_vfpcc i32 @xor_v4i32_acc(<4 x i32> %x, i32 %y) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> %x) + %z = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %x) %r = xor i32 %y, %z ret i32 %r } @@ -1086,7 +1086,7 @@ define arm_aapcs_vfpcc i32 @xor_v8i32_acc(<8 x i32> %x, i32 %y) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> %x) + %z = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %x) %r = xor i32 %y, %z ret i32 %r } @@ -1104,7 +1104,7 @@ define arm_aapcs_vfpcc i16 @xor_v4i16_acc(<4 x i16> %x, i16 %y) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> %x) + %z = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %x) %r = xor i16 %y, %z ret i16 %r } @@ -1124,7 +1124,7 @@ define arm_aapcs_vfpcc i16 @xor_v8i16_acc(<8 x i16> %x, i16 %y) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> %x) + %z = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %x) %r = xor i16 %y, %z ret i16 %r } @@ -1145,7 +1145,7 @@ define arm_aapcs_vfpcc i16 @xor_v16i16_acc(<16 x i16> %x, i16 %y) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> %x) + %z = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %x) %r = xor i16 %y, %z ret i16 %r } @@ -1165,7 +1165,7 @@ define arm_aapcs_vfpcc i8 @xor_v8i8_acc(<8 x i8> %x, i8 %y) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> %x) + %z = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %x) %r = xor i8 %y, %z ret i8 %r } @@ -1187,7 +1187,7 @@ define arm_aapcs_vfpcc i8 @xor_v16i8_acc(<16 x i8> %x, i8 %y) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> %x) + %z = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %x) %r = xor i8 %y, %z ret i8 %r } @@ -1210,7 +1210,7 @@ define arm_aapcs_vfpcc i8 @xor_v32i8_acc(<32 x i8> %x, i8 %y) { ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> %x) + %z = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %x) %r = xor i8 %y, %z ret i8 %r } @@ -1222,7 +1222,7 @@ define arm_aapcs_vfpcc i64 @xor_v1i64_acc(<1 x i64> %x, i64 %y) { ; CHECK-NEXT: eors r1, r3 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> %x) + %z = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> %x) %r = xor i64 %y, %z ret i64 %r } @@ -1240,7 +1240,7 @@ define arm_aapcs_vfpcc i64 @xor_v2i64_acc(<2 x i64> %x, i64 %y) { ; CHECK-NEXT: eors r1, r2 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> %x) + %z = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %x) %r = xor i64 %y, %z ret i64 %r } @@ -1259,44 +1259,44 @@ define arm_aapcs_vfpcc i64 @xor_v4i64_acc(<4 x i64> %x, i64 %y) { ; CHECK-NEXT: eors r1, r2 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> %x) + %z = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %x) %r = xor i64 %y, %z ret i64 %r } -declare i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32>) -declare i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64>) -declare i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8>) +declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>) +declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.or.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.xor.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>) +declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>) diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll index 77f0c77033f95..8ead4f5cfebfd 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll @@ -9,7 +9,7 @@ define arm_aapcs_vfpcc float @fadd_v2f32(<2 x float> %x, float %y) { ; CHECK-NEXT: vadd.f32 s0, s4, s0 ; CHECK-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float %y, <2 x float> %x) + %z = call fast float @llvm.vector.reduce.fadd.f32.v2f32(float %y, <2 x float> %x) ret float %z } @@ -30,7 +30,7 @@ define arm_aapcs_vfpcc float @fadd_v4f32(<4 x float> %x, float %y) { ; CHECK-NOFP-NEXT: vadd.f32 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %y, <4 x float> %x) + %z = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %y, <4 x float> %x) ret float %z } @@ -56,7 +56,7 @@ define arm_aapcs_vfpcc float @fadd_v8f32(<8 x float> %x, float %y) { ; CHECK-NOFP-NEXT: vadd.f32 s0, s8, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float %y, <8 x float> %x) + %z = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float %y, <8 x float> %x) ret float %z } @@ -71,7 +71,7 @@ define arm_aapcs_vfpcc void @fadd_v2f16(<2 x half> %x, half* %yy) { ; CHECK-NEXT: bx lr entry: %y = load half, half* %yy - %z = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v2f16(half %y, <2 x half> %x) + %z = call fast half @llvm.vector.reduce.fadd.f16.v2f16(half %y, <2 x half> %x) store half %z, half* %yy ret void } @@ -102,7 +102,7 @@ define arm_aapcs_vfpcc void @fadd_v4f16(<4 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half %y, <4 x half> %x) + %z = call fast half @llvm.vector.reduce.fadd.f16.v4f16(half %y, <4 x half> %x) store half %z, half* %yy ret void } @@ -139,7 +139,7 @@ define arm_aapcs_vfpcc void @fadd_v8f16(<8 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v8f16(half %y, <8 x half> %x) + %z = call fast half @llvm.vector.reduce.fadd.f16.v8f16(half %y, <8 x half> %x) store half %z, half* %yy ret void } @@ -189,7 +189,7 @@ define arm_aapcs_vfpcc void @fadd_v16f16(<16 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v16f16(half %y, <16 x half> %x) + %z = call fast half @llvm.vector.reduce.fadd.f16.v16f16(half %y, <16 x half> %x) store half %z, half* %yy ret void } @@ -200,7 +200,7 @@ define arm_aapcs_vfpcc double @fadd_v1f64(<1 x double> %x, double %y) { ; CHECK-NEXT: vadd.f64 d0, d1, d0 ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double %y, <1 x double> %x) + %z = call fast double @llvm.vector.reduce.fadd.f64.v1f64(double %y, <1 x double> %x) ret double %z } @@ -211,7 +211,7 @@ define arm_aapcs_vfpcc double @fadd_v2f64(<2 x double> %x, double %y) { ; CHECK-NEXT: vadd.f64 d0, d2, d0 ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double %y, <2 x double> %x) + %z = call fast double @llvm.vector.reduce.fadd.f64.v2f64(double %y, <2 x double> %x) ret double %z } @@ -224,7 +224,7 @@ define arm_aapcs_vfpcc double @fadd_v4f64(<4 x double> %x, double %y) { ; CHECK-NEXT: vadd.f64 d0, d4, d0 ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double %y, <4 x double> %x) + %z = call fast double @llvm.vector.reduce.fadd.f64.v4f64(double %y, <4 x double> %x) ret double %z } @@ -235,7 +235,7 @@ define arm_aapcs_vfpcc float @fadd_v2f32_nofast(<2 x float> %x, float %y) { ; CHECK-NEXT: vadd.f32 s0, s4, s1 ; CHECK-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float %y, <2 x float> %x) + %z = call float @llvm.vector.reduce.fadd.f32.v2f32(float %y, <2 x float> %x) ret float %z } @@ -248,7 +248,7 @@ define arm_aapcs_vfpcc float @fadd_v4f32_nofast(<4 x float> %x, float %y) { ; CHECK-NEXT: vadd.f32 s0, s4, s3 ; CHECK-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %y, <4 x float> %x) + %z = call float @llvm.vector.reduce.fadd.f32.v4f32(float %y, <4 x float> %x) ret float %z } @@ -265,7 +265,7 @@ define arm_aapcs_vfpcc float @fadd_v8f32_nofast(<8 x float> %x, float %y) { ; CHECK-NEXT: vadd.f32 s0, s0, s7 ; CHECK-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float %y, <8 x float> %x) + %z = call float @llvm.vector.reduce.fadd.f32.v8f32(float %y, <8 x float> %x) ret float %z } @@ -283,7 +283,7 @@ define arm_aapcs_vfpcc void @fadd_v4f16_nofast(<4 x half> %x, half* %yy) { ; CHECK-NEXT: bx lr entry: %y = load half, half* %yy - %z = call half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half %y, <4 x half> %x) + %z = call half @llvm.vector.reduce.fadd.f16.v4f16(half %y, <4 x half> %x) store half %z, half* %yy ret void } @@ -308,7 +308,7 @@ define arm_aapcs_vfpcc void @fadd_v8f16_nofast(<8 x half> %x, half* %yy) { ; CHECK-NEXT: bx lr entry: %y = load half, half* %yy - %z = call half @llvm.experimental.vector.reduce.v2.fadd.f16.v8f16(half %y, <8 x half> %x) + %z = call half @llvm.vector.reduce.fadd.f16.v8f16(half %y, <8 x half> %x) store half %z, half* %yy ret void } @@ -345,7 +345,7 @@ define arm_aapcs_vfpcc void @fadd_v16f16_nofast(<16 x half> %x, half* %yy) { ; CHECK-NEXT: bx lr entry: %y = load half, half* %yy - %z = call half @llvm.experimental.vector.reduce.v2.fadd.f16.v16f16(half %y, <16 x half> %x) + %z = call half @llvm.vector.reduce.fadd.f16.v16f16(half %y, <16 x half> %x) store half %z, half* %yy ret void } @@ -356,7 +356,7 @@ define arm_aapcs_vfpcc double @fadd_v1f64_nofast(<1 x double> %x, double %y) { ; CHECK-NEXT: vadd.f64 d0, d1, d0 ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double %y, <1 x double> %x) + %z = call double @llvm.vector.reduce.fadd.f64.v1f64(double %y, <1 x double> %x) ret double %z } @@ -367,7 +367,7 @@ define arm_aapcs_vfpcc double @fadd_v2f64_nofast(<2 x double> %x, double %y) { ; CHECK-NEXT: vadd.f64 d0, d2, d1 ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double %y, <2 x double> %x) + %z = call double @llvm.vector.reduce.fadd.f64.v2f64(double %y, <2 x double> %x) ret double %z } @@ -380,17 +380,17 @@ define arm_aapcs_vfpcc double @fadd_v4f64_nofast(<4 x double> %x, double %y) { ; CHECK-NEXT: vadd.f64 d0, d0, d3 ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double %y, <4 x double> %x) + %z = call double @llvm.vector.reduce.fadd.f64.v4f64(double %y, <4 x double> %x) ret double %z } -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double, <1 x double>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double, <2 x double>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double, <4 x double>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float, <2 x float>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>) -declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v16f16(half, <16 x half>) -declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v2f16(half, <2 x half>) -declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half, <4 x half>) -declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v8f16(half, <8 x half>) +declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>) +declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>) +declare double @llvm.vector.reduce.fadd.f64.v4f64(double, <4 x double>) +declare float @llvm.vector.reduce.fadd.f32.v2f32(float, <2 x float>) +declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>) +declare half @llvm.vector.reduce.fadd.f16.v16f16(half, <16 x half>) +declare half @llvm.vector.reduce.fadd.f16.v2f16(half, <2 x half>) +declare half @llvm.vector.reduce.fadd.f16.v4f16(half, <4 x half>) +declare half @llvm.vector.reduce.fadd.f16.v8f16(half, <8 x half>) diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll index a83fa6882cb90..45c6972312e3d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll @@ -8,7 +8,7 @@ define arm_aapcs_vfpcc float @fmin_v2f32(<2 x float> %x) { ; CHECK-NEXT: vminnm.f32 s0, s0, s1 ; CHECK-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %x) + %z = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x) ret float %z } @@ -27,7 +27,7 @@ define arm_aapcs_vfpcc float @fmin_v4f32(<4 x float> %x) { ; CHECK-NOFP-NEXT: vminnm.f32 s0, s4, s3 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %x) + %z = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x) ret float %z } @@ -60,7 +60,7 @@ define arm_aapcs_vfpcc float @fmin_v8f32(<8 x float> %x) { ; CHECK-NOFP-NEXT: vminnm.f32 s0, s2, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %x) + %z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x) ret float %z } @@ -83,7 +83,7 @@ define arm_aapcs_vfpcc half @fmin_v4f16(<4 x half> %x) { ; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %x) + %z = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x) ret half %z } @@ -112,7 +112,7 @@ define arm_aapcs_vfpcc half @fmin_v8f16(<8 x half> %x) { ; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %x) + %z = call fast half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x) ret half %z } @@ -170,7 +170,7 @@ define arm_aapcs_vfpcc half @fmin_v16f16(<16 x half> %x) { ; CHECK-NOFP-NEXT: vminnm.f16 s0, s8, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %x) + %z = call fast half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x) ret half %z } @@ -179,7 +179,7 @@ define arm_aapcs_vfpcc double @fmin_v1f64(<1 x double> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %x) + %z = call fast double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x) ret double %z } @@ -189,7 +189,7 @@ define arm_aapcs_vfpcc double @fmin_v2f64(<2 x double> %x) { ; CHECK-NEXT: vminnm.f64 d0, d0, d1 ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %x) + %z = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x) ret double %z } @@ -205,7 +205,7 @@ define arm_aapcs_vfpcc double @fmin_v4f64(<4 x double> %x) { ; CHECK-NEXT: vminnm.f64 d0, d0, d4 ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %x) + %z = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x) ret double %z } @@ -215,7 +215,7 @@ define arm_aapcs_vfpcc float @fmin_v2f32_nofast(<2 x float> %x) { ; CHECK-NEXT: vminnm.f32 s0, s0, s1 ; CHECK-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %x) + %z = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x) ret float %z } @@ -234,7 +234,7 @@ define arm_aapcs_vfpcc float @fmin_v4f32_nofast(<4 x float> %x) { ; CHECK-NOFP-NEXT: vminnm.f32 s0, s4, s3 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %x) + %z = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x) ret float %z } @@ -258,7 +258,7 @@ define arm_aapcs_vfpcc float @fmin_v8f32_nofast(<8 x float> %x) { ; CHECK-NOFP-NEXT: vminnm.f32 s0, s8, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %x) + %z = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x) ret float %z } @@ -281,7 +281,7 @@ define arm_aapcs_vfpcc half @fmin_v4f16_nofast(<4 x half> %x) { ; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %x) + %z = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x) ret half %z } @@ -310,7 +310,7 @@ define arm_aapcs_vfpcc half @fmin_v8f16_nofast(<8 x half> %x) { ; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %x) + %z = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x) ret half %z } @@ -352,7 +352,7 @@ define arm_aapcs_vfpcc half @fmin_v16f16_nofast(<16 x half> %x) { ; CHECK-NOFP-NEXT: vminnm.f16 s0, s8, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %x) + %z = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x) ret half %z } @@ -361,7 +361,7 @@ define arm_aapcs_vfpcc double @fmin_v1f64_nofast(<1 x double> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %x) + %z = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x) ret double %z } @@ -371,7 +371,7 @@ define arm_aapcs_vfpcc double @fmin_v2f64_nofast(<2 x double> %x) { ; CHECK-NEXT: vminnm.f64 d0, d0, d1 ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %x) + %z = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x) ret double %z } @@ -383,7 +383,7 @@ define arm_aapcs_vfpcc double @fmin_v4f64_nofast(<4 x double> %x) { ; CHECK-NEXT: vminnm.f64 d0, d0, d4 ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %x) + %z = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x) ret double %z } @@ -394,7 +394,7 @@ define arm_aapcs_vfpcc float @fmin_v2f32_acc(<2 x float> %x, float %y) { ; CHECK-NEXT: vminnm.f32 s0, s4, s0 ; CHECK-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %x) + %z = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x) %c = fcmp fast olt float %y, %z %r = select i1 %c, float %y, float %z ret float %r @@ -417,7 +417,7 @@ define arm_aapcs_vfpcc float @fmin_v4f32_acc(<4 x float> %x, float %y) { ; CHECK-NOFP-NEXT: vminnm.f32 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %x) + %z = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x) %c = fcmp fast olt float %y, %z %r = select i1 %c, float %y, float %z ret float %r @@ -453,7 +453,7 @@ define arm_aapcs_vfpcc float @fmin_v8f32_acc(<8 x float> %x, float %y) { ; CHECK-NOFP-NEXT: vminnm.f32 s0, s8, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %x) + %z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x) %c = fcmp fast olt float %y, %z %r = select i1 %c, float %y, float %z ret float %r @@ -485,7 +485,7 @@ define arm_aapcs_vfpcc void @fmin_v4f16_acc(<4 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call fast half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %x) + %z = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x) %c = fcmp fast olt half %y, %z %r = select i1 %c, half %y, half %z store half %r, half* %yy @@ -503,7 +503,7 @@ define arm_aapcs_vfpcc void @fmin_v2f16_acc(<2 x half> %x, half* %yy) { ; CHECK-NEXT: bx lr entry: %y = load half, half* %yy - %z = call fast half @llvm.experimental.vector.reduce.fmin.v2f16(<2 x half> %x) + %z = call fast half @llvm.vector.reduce.fmin.v2f16(<2 x half> %x) %c = fcmp fast olt half %y, %z %r = select i1 %c, half %y, half %z store half %r, half* %yy @@ -542,7 +542,7 @@ define arm_aapcs_vfpcc void @fmin_v8f16_acc(<8 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call fast half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %x) + %z = call fast half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x) %c = fcmp fast olt half %y, %z %r = select i1 %c, half %y, half %z store half %r, half* %yy @@ -610,7 +610,7 @@ define arm_aapcs_vfpcc void @fmin_v16f16_acc(<16 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call fast half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %x) + %z = call fast half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x) %c = fcmp fast olt half %y, %z %r = select i1 %c, half %y, half %z store half %r, half* %yy @@ -623,7 +623,7 @@ define arm_aapcs_vfpcc double @fmin_v1f64_acc(<1 x double> %x, double %y) { ; CHECK-NEXT: vminnm.f64 d0, d1, d0 ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %x) + %z = call fast double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x) %c = fcmp fast olt double %y, %z %r = select i1 %c, double %y, double %z ret double %r @@ -636,7 +636,7 @@ define arm_aapcs_vfpcc double @fmin_v2f64_acc(<2 x double> %x, double %y) { ; CHECK-NEXT: vminnm.f64 d0, d2, d0 ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %x) + %z = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x) %c = fcmp fast olt double %y, %z %r = select i1 %c, double %y, double %z ret double %r @@ -655,7 +655,7 @@ define arm_aapcs_vfpcc double @fmin_v4f64_acc(<4 x double> %x, double %y) { ; CHECK-NEXT: vminnm.f64 d0, d4, d0 ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %x) + %z = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x) %c = fcmp fast olt double %y, %z %r = select i1 %c, double %y, double %z ret double %r @@ -670,7 +670,7 @@ define arm_aapcs_vfpcc float @fmin_v2f32_acc_nofast(<2 x float> %x, float %y) { ; CHECK-NEXT: vselgt.f32 s0, s4, s0 ; CHECK-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %x) + %z = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x) %c = fcmp olt float %y, %z %r = select i1 %c, float %y, float %z ret float %r @@ -697,7 +697,7 @@ define arm_aapcs_vfpcc float @fmin_v4f32_acc_nofast(<4 x float> %x, float %y) { ; CHECK-NOFP-NEXT: vselgt.f32 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %x) + %z = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x) %c = fcmp olt float %y, %z %r = select i1 %c, float %y, float %z ret float %r @@ -729,7 +729,7 @@ define arm_aapcs_vfpcc float @fmin_v8f32_acc_nofast(<8 x float> %x, float %y) { ; CHECK-NOFP-NEXT: vselgt.f32 s0, s8, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %x) + %z = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x) %c = fcmp olt float %y, %z %r = select i1 %c, float %y, float %z ret float %r @@ -765,7 +765,7 @@ define arm_aapcs_vfpcc void @fmin_v4f16_acc_nofast(<4 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %x) + %z = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x) %c = fcmp olt half %y, %z %r = select i1 %c, half %y, half %z store half %r, half* %yy @@ -808,7 +808,7 @@ define arm_aapcs_vfpcc void @fmin_v8f16_acc_nofast(<8 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %x) + %z = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x) %c = fcmp olt half %y, %z %r = select i1 %c, half %y, half %z store half %r, half* %yy @@ -864,7 +864,7 @@ define arm_aapcs_vfpcc void @fmin_v16f16_acc_nofast(<16 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %x) + %z = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x) %c = fcmp olt half %y, %z %r = select i1 %c, half %y, half %z store half %r, half* %yy @@ -879,7 +879,7 @@ define arm_aapcs_vfpcc double @fmin_v1f64_acc_nofast(<1 x double> %x, double %y) ; CHECK-NEXT: vselgt.f64 d0, d1, d0 ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %x) + %z = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x) %c = fcmp olt double %y, %z %r = select i1 %c, double %y, double %z ret double %r @@ -894,7 +894,7 @@ define arm_aapcs_vfpcc double @fmin_v2f64_acc_nofast(<2 x double> %x, double %y) ; CHECK-NEXT: vselgt.f64 d0, d2, d0 ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %x) + %z = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x) %c = fcmp olt double %y, %z %r = select i1 %c, double %y, double %z ret double %r @@ -911,7 +911,7 @@ define arm_aapcs_vfpcc double @fmin_v4f64_acc_nofast(<4 x double> %x, double %y) ; CHECK-NEXT: vselgt.f64 d0, d4, d0 ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %x) + %z = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x) %c = fcmp olt double %y, %z %r = select i1 %c, double %y, double %z ret double %r @@ -923,7 +923,7 @@ define arm_aapcs_vfpcc float @fmax_v2f32(<2 x float> %x) { ; CHECK-NEXT: vmaxnm.f32 s0, s0, s1 ; CHECK-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %x) + %z = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x) ret float %z } @@ -942,7 +942,7 @@ define arm_aapcs_vfpcc float @fmax_v4f32(<4 x float> %x) { ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s4, s3 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %x) + %z = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x) ret float %z } @@ -974,7 +974,7 @@ define arm_aapcs_vfpcc float @fmax_v8f32(<8 x float> %x) { ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s2, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %x) + %z = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x) ret float %z } @@ -997,7 +997,7 @@ define arm_aapcs_vfpcc half @fmax_v4f16(<4 x half> %x) { ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %x) + %z = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x) ret half %z } @@ -1026,7 +1026,7 @@ define arm_aapcs_vfpcc half @fmax_v8f16(<8 x half> %x) { ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %x) + %z = call fast half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x) ret half %z } @@ -1084,7 +1084,7 @@ define arm_aapcs_vfpcc half @fmax_v16f16(<16 x half> %x) { ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s8, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %x) + %z = call fast half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x) ret half %z } @@ -1093,7 +1093,7 @@ define arm_aapcs_vfpcc double @fmax_v1f64(<1 x double> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %x) + %z = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x) ret double %z } @@ -1103,7 +1103,7 @@ define arm_aapcs_vfpcc double @fmax_v2f64(<2 x double> %x) { ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1 ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %x) + %z = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x) ret double %z } @@ -1119,7 +1119,7 @@ define arm_aapcs_vfpcc double @fmax_v4f64(<4 x double> %x) { ; CHECK-NEXT: vmaxnm.f64 d0, d0, d4 ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %x) + %z = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x) ret double %z } @@ -1129,7 +1129,7 @@ define arm_aapcs_vfpcc float @fmax_v2f32_nofast(<2 x float> %x) { ; CHECK-NEXT: vmaxnm.f32 s0, s0, s1 ; CHECK-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %x) + %z = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x) ret float %z } @@ -1148,7 +1148,7 @@ define arm_aapcs_vfpcc float @fmax_v4f32_nofast(<4 x float> %x) { ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s4, s3 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %x) + %z = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x) ret float %z } @@ -1172,7 +1172,7 @@ define arm_aapcs_vfpcc float @fmax_v8f32_nofast(<8 x float> %x) { ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s8, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %x) + %z = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x) ret float %z } @@ -1195,7 +1195,7 @@ define arm_aapcs_vfpcc half @fmax_v4f16_nofast(<4 x half> %x) { ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %x) + %z = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x) ret half %z } @@ -1224,7 +1224,7 @@ define arm_aapcs_vfpcc half @fmax_v8f16_nofast(<8 x half> %x) { ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %x) + %z = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x) ret half %z } @@ -1266,7 +1266,7 @@ define arm_aapcs_vfpcc half @fmax_v16f16_nofast(<16 x half> %x) { ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s8, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %x) + %z = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x) ret half %z } @@ -1275,7 +1275,7 @@ define arm_aapcs_vfpcc double @fmax_v1f64_nofast(<1 x double> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %x) + %z = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x) ret double %z } @@ -1285,7 +1285,7 @@ define arm_aapcs_vfpcc double @fmax_v2f64_nofast(<2 x double> %x) { ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1 ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %x) + %z = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x) ret double %z } @@ -1297,7 +1297,7 @@ define arm_aapcs_vfpcc double @fmax_v4f64_nofast(<4 x double> %x) { ; CHECK-NEXT: vmaxnm.f64 d0, d0, d4 ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %x) + %z = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x) ret double %z } @@ -1308,7 +1308,7 @@ define arm_aapcs_vfpcc float @fmax_v2f32_acc(<2 x float> %x, float %y) { ; CHECK-NEXT: vmaxnm.f32 s0, s4, s0 ; CHECK-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %x) + %z = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x) %c = fcmp fast ogt float %y, %z %r = select i1 %c, float %y, float %z ret float %r @@ -1331,7 +1331,7 @@ define arm_aapcs_vfpcc float @fmax_v4f32_acc(<4 x float> %x, float %y) { ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %x) + %z = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x) %c = fcmp fast ogt float %y, %z %r = select i1 %c, float %y, float %z ret float %r @@ -1367,7 +1367,7 @@ define arm_aapcs_vfpcc float @fmax_v8f32_acc(<8 x float> %x, float %y) { ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s8, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %x) + %z = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x) %c = fcmp fast ogt float %y, %z %r = select i1 %c, float %y, float %z ret float %r @@ -1384,7 +1384,7 @@ define arm_aapcs_vfpcc void @fmax_v2f16_acc(<2 x half> %x, half* %yy) { ; CHECK-NEXT: bx lr entry: %y = load half, half* %yy - %z = call fast half @llvm.experimental.vector.reduce.fmax.v2f16(<2 x half> %x) + %z = call fast half @llvm.vector.reduce.fmax.v2f16(<2 x half> %x) %c = fcmp fast ogt half %y, %z %r = select i1 %c, half %y, half %z store half %r, half* %yy @@ -1417,7 +1417,7 @@ define arm_aapcs_vfpcc void @fmax_v4f16_acc(<4 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call fast half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %x) + %z = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x) %c = fcmp fast ogt half %y, %z %r = select i1 %c, half %y, half %z store half %r, half* %yy @@ -1456,7 +1456,7 @@ define arm_aapcs_vfpcc void @fmax_v8f16_acc(<8 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call fast half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %x) + %z = call fast half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x) %c = fcmp fast ogt half %y, %z %r = select i1 %c, half %y, half %z store half %r, half* %yy @@ -1524,7 +1524,7 @@ define arm_aapcs_vfpcc void @fmax_v16f16_acc(<16 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call fast half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %x) + %z = call fast half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x) %c = fcmp fast ogt half %y, %z %r = select i1 %c, half %y, half %z store half %r, half* %yy @@ -1537,7 +1537,7 @@ define arm_aapcs_vfpcc double @fmax_v1f64_acc(<1 x double> %x, double %y) { ; CHECK-NEXT: vmaxnm.f64 d0, d1, d0 ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %x) + %z = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x) %c = fcmp fast ogt double %y, %z %r = select i1 %c, double %y, double %z ret double %r @@ -1550,7 +1550,7 @@ define arm_aapcs_vfpcc double @fmax_v2f64_acc(<2 x double> %x, double %y) { ; CHECK-NEXT: vmaxnm.f64 d0, d2, d0 ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %x) + %z = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x) %c = fcmp fast ogt double %y, %z %r = select i1 %c, double %y, double %z ret double %r @@ -1569,7 +1569,7 @@ define arm_aapcs_vfpcc double @fmax_v4f64_acc(<4 x double> %x, double %y) { ; CHECK-NEXT: vmaxnm.f64 d0, d4, d0 ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %x) + %z = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x) %c = fcmp fast ogt double %y, %z %r = select i1 %c, double %y, double %z ret double %r @@ -1584,7 +1584,7 @@ define arm_aapcs_vfpcc float @fmax_v2f32_acc_nofast(<2 x float> %x, float %y) { ; CHECK-NEXT: vselgt.f32 s0, s4, s0 ; CHECK-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %x) + %z = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x) %c = fcmp ogt float %y, %z %r = select i1 %c, float %y, float %z ret float %r @@ -1611,7 +1611,7 @@ define arm_aapcs_vfpcc float @fmax_v4f32_acc_nofast(<4 x float> %x, float %y) { ; CHECK-NOFP-NEXT: vselgt.f32 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %x) + %z = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x) %c = fcmp ogt float %y, %z %r = select i1 %c, float %y, float %z ret float %r @@ -1643,7 +1643,7 @@ define arm_aapcs_vfpcc float @fmax_v8f32_acc_nofast(<8 x float> %x, float %y) { ; CHECK-NOFP-NEXT: vselgt.f32 s0, s8, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %x) + %z = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x) %c = fcmp ogt float %y, %z %r = select i1 %c, float %y, float %z ret float %r @@ -1679,7 +1679,7 @@ define arm_aapcs_vfpcc void @fmax_v4f16_acc_nofast(<4 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %x) + %z = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x) %c = fcmp ogt half %y, %z %r = select i1 %c, half %y, half %z store half %r, half* %yy @@ -1722,7 +1722,7 @@ define arm_aapcs_vfpcc void @fmax_v8f16_acc_nofast(<8 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %x) + %z = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x) %c = fcmp ogt half %y, %z %r = select i1 %c, half %y, half %z store half %r, half* %yy @@ -1778,7 +1778,7 @@ define arm_aapcs_vfpcc void @fmax_v16f16_acc_nofast(<16 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %x) + %z = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x) %c = fcmp ogt half %y, %z %r = select i1 %c, half %y, half %z store half %r, half* %yy @@ -1793,7 +1793,7 @@ define arm_aapcs_vfpcc double @fmax_v1f64_acc_nofast(<1 x double> %x, double %y) ; CHECK-NEXT: vselgt.f64 d0, d1, d0 ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %x) + %z = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x) %c = fcmp ogt double %y, %z %r = select i1 %c, double %y, double %z ret double %r @@ -1808,7 +1808,7 @@ define arm_aapcs_vfpcc double @fmax_v2f64_acc_nofast(<2 x double> %x, double %y) ; CHECK-NEXT: vselgt.f64 d0, d2, d0 ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %x) + %z = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x) %c = fcmp ogt double %y, %z %r = select i1 %c, double %y, double %z ret double %r @@ -1825,29 +1825,29 @@ define arm_aapcs_vfpcc double @fmax_v4f64_acc_nofast(<4 x double> %x, double %y) ; CHECK-NEXT: vselgt.f64 d0, d4, d0 ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %x) + %z = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x) %c = fcmp ogt double %y, %z %r = select i1 %c, double %y, double %z ret double %r } -declare double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double>) -declare float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>) -declare half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half>) -declare half @llvm.experimental.vector.reduce.fmax.v2f16(<2 x half>) -declare half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half>) -declare half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half>) -declare half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half>) -declare half @llvm.experimental.vector.reduce.fmin.v2f16(<2 x half>) -declare half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half>) -declare half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half>) +declare double @llvm.vector.reduce.fmax.v1f64(<1 x double>) +declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) +declare double @llvm.vector.reduce.fmin.v1f64(<1 x double>) +declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) +declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>) +declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) +declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) +declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) +declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>) +declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>) +declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>) +declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>) +declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>) +declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>) +declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>) +declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>) diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fmul.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fmul.ll index 89d1546932a4e..940e2e2f4a0c4 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fmul.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fmul.ll @@ -9,7 +9,7 @@ define arm_aapcs_vfpcc float @fmul_v2f32(<2 x float> %x, float %y) { ; CHECK-NEXT: vmul.f32 s0, s4, s0 ; CHECK-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float %y, <2 x float> %x) + %z = call fast float @llvm.vector.reduce.fmul.f32.v2f32(float %y, <2 x float> %x) ret float %z } @@ -30,7 +30,7 @@ define arm_aapcs_vfpcc float @fmul_v4f32(<4 x float> %x, float %y) { ; CHECK-NOFP-NEXT: vmul.f32 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %y, <4 x float> %x) + %z = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float %y, <4 x float> %x) ret float %z } @@ -56,7 +56,7 @@ define arm_aapcs_vfpcc float @fmul_v8f32(<8 x float> %x, float %y) { ; CHECK-NOFP-NEXT: vmul.f32 s0, s8, s0 ; CHECK-NOFP-NEXT: bx lr entry: - %z = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float %y, <8 x float> %x) + %z = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float %y, <8 x float> %x) ret float %z } @@ -71,7 +71,7 @@ define arm_aapcs_vfpcc void @fmul_v2f16(<2 x half> %x, half* %yy) { ; CHECK-NEXT: bx lr entry: %y = load half, half* %yy - %z = call fast half @llvm.experimental.vector.reduce.v2.fmul.f16.v2f16(half %y, <2 x half> %x) + %z = call fast half @llvm.vector.reduce.fmul.f16.v2f16(half %y, <2 x half> %x) store half %z, half* %yy ret void } @@ -102,7 +102,7 @@ define arm_aapcs_vfpcc void @fmul_v4f16(<4 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call fast half @llvm.experimental.vector.reduce.v2.fmul.f16.v4f16(half %y, <4 x half> %x) + %z = call fast half @llvm.vector.reduce.fmul.f16.v4f16(half %y, <4 x half> %x) store half %z, half* %yy ret void } @@ -139,7 +139,7 @@ define arm_aapcs_vfpcc void @fmul_v8f16(<8 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call fast half @llvm.experimental.vector.reduce.v2.fmul.f16.v8f16(half %y, <8 x half> %x) + %z = call fast half @llvm.vector.reduce.fmul.f16.v8f16(half %y, <8 x half> %x) store half %z, half* %yy ret void } @@ -189,7 +189,7 @@ define arm_aapcs_vfpcc void @fmul_v16f16(<16 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: bx lr entry: %y = load half, half* %yy - %z = call fast half @llvm.experimental.vector.reduce.v2.fmul.f16.v16f16(half %y, <16 x half> %x) + %z = call fast half @llvm.vector.reduce.fmul.f16.v16f16(half %y, <16 x half> %x) store half %z, half* %yy ret void } @@ -200,7 +200,7 @@ define arm_aapcs_vfpcc double @fmul_v1f64(<1 x double> %x, double %y) { ; CHECK-NEXT: vmul.f64 d0, d1, d0 ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double %y, <1 x double> %x) + %z = call fast double @llvm.vector.reduce.fmul.f64.v1f64(double %y, <1 x double> %x) ret double %z } @@ -211,7 +211,7 @@ define arm_aapcs_vfpcc double @fmul_v2f64(<2 x double> %x, double %y) { ; CHECK-NEXT: vmul.f64 d0, d2, d0 ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double %y, <2 x double> %x) + %z = call fast double @llvm.vector.reduce.fmul.f64.v2f64(double %y, <2 x double> %x) ret double %z } @@ -224,7 +224,7 @@ define arm_aapcs_vfpcc double @fmul_v4f64(<4 x double> %x, double %y) { ; CHECK-NEXT: vmul.f64 d0, d4, d0 ; CHECK-NEXT: bx lr entry: - %z = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double %y, <4 x double> %x) + %z = call fast double @llvm.vector.reduce.fmul.f64.v4f64(double %y, <4 x double> %x) ret double %z } @@ -235,7 +235,7 @@ define arm_aapcs_vfpcc float @fmul_v2f32_nofast(<2 x float> %x, float %y) { ; CHECK-NEXT: vmul.f32 s0, s4, s1 ; CHECK-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float %y, <2 x float> %x) + %z = call float @llvm.vector.reduce.fmul.f32.v2f32(float %y, <2 x float> %x) ret float %z } @@ -248,7 +248,7 @@ define arm_aapcs_vfpcc float @fmul_v4f32_nofast(<4 x float> %x, float %y) { ; CHECK-NEXT: vmul.f32 s0, s4, s3 ; CHECK-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %y, <4 x float> %x) + %z = call float @llvm.vector.reduce.fmul.f32.v4f32(float %y, <4 x float> %x) ret float %z } @@ -265,7 +265,7 @@ define arm_aapcs_vfpcc float @fmul_v8f32_nofast(<8 x float> %x, float %y) { ; CHECK-NEXT: vmul.f32 s0, s0, s7 ; CHECK-NEXT: bx lr entry: - %z = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float %y, <8 x float> %x) + %z = call float @llvm.vector.reduce.fmul.f32.v8f32(float %y, <8 x float> %x) ret float %z } @@ -280,7 +280,7 @@ define arm_aapcs_vfpcc void @fmul_v2f16_nofast(<2 x half> %x, half* %yy) { ; CHECK-NEXT: bx lr entry: %y = load half, half* %yy - %z = call half @llvm.experimental.vector.reduce.v2.fmul.f16.v2f16(half %y, <2 x half> %x) + %z = call half @llvm.vector.reduce.fmul.f16.v2f16(half %y, <2 x half> %x) store half %z, half* %yy ret void } @@ -299,7 +299,7 @@ define arm_aapcs_vfpcc void @fmul_v4f16_nofast(<4 x half> %x, half* %yy) { ; CHECK-NEXT: bx lr entry: %y = load half, half* %yy - %z = call half @llvm.experimental.vector.reduce.v2.fmul.f16.v4f16(half %y, <4 x half> %x) + %z = call half @llvm.vector.reduce.fmul.f16.v4f16(half %y, <4 x half> %x) store half %z, half* %yy ret void } @@ -324,7 +324,7 @@ define arm_aapcs_vfpcc void @fmul_v8f16_nofast(<8 x half> %x, half* %yy) { ; CHECK-NEXT: bx lr entry: %y = load half, half* %yy - %z = call half @llvm.experimental.vector.reduce.v2.fmul.f16.v8f16(half %y, <8 x half> %x) + %z = call half @llvm.vector.reduce.fmul.f16.v8f16(half %y, <8 x half> %x) store half %z, half* %yy ret void } @@ -361,7 +361,7 @@ define arm_aapcs_vfpcc void @fmul_v16f16_nofast(<16 x half> %x, half* %yy) { ; CHECK-NEXT: bx lr entry: %y = load half, half* %yy - %z = call half @llvm.experimental.vector.reduce.v2.fmul.f16.v16f16(half %y, <16 x half> %x) + %z = call half @llvm.vector.reduce.fmul.f16.v16f16(half %y, <16 x half> %x) store half %z, half* %yy ret void } @@ -372,7 +372,7 @@ define arm_aapcs_vfpcc double @fmul_v1f64_nofast(<1 x double> %x, double %y) { ; CHECK-NEXT: vmul.f64 d0, d1, d0 ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double %y, <1 x double> %x) + %z = call double @llvm.vector.reduce.fmul.f64.v1f64(double %y, <1 x double> %x) ret double %z } @@ -383,7 +383,7 @@ define arm_aapcs_vfpcc double @fmul_v2f64_nofast(<2 x double> %x, double %y) { ; CHECK-NEXT: vmul.f64 d0, d2, d1 ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double %y, <2 x double> %x) + %z = call double @llvm.vector.reduce.fmul.f64.v2f64(double %y, <2 x double> %x) ret double %z } @@ -396,17 +396,17 @@ define arm_aapcs_vfpcc double @fmul_v4f64_nofast(<4 x double> %x, double %y) { ; CHECK-NEXT: vmul.f64 d0, d0, d3 ; CHECK-NEXT: bx lr entry: - %z = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double %y, <4 x double> %x) + %z = call double @llvm.vector.reduce.fmul.f64.v4f64(double %y, <4 x double> %x) ret double %z } -declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double, <1 x double>) -declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double, <2 x double>) -declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double, <4 x double>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float, <2 x float>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float, <8 x float>) -declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v16f16(half, <16 x half>) -declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v2f16(half, <2 x half>) -declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v4f16(half, <4 x half>) -declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v8f16(half, <8 x half>) +declare double @llvm.vector.reduce.fmul.f64.v1f64(double, <1 x double>) +declare double @llvm.vector.reduce.fmul.f64.v2f64(double, <2 x double>) +declare double @llvm.vector.reduce.fmul.f64.v4f64(double, <4 x double>) +declare float @llvm.vector.reduce.fmul.f32.v2f32(float, <2 x float>) +declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fmul.f32.v8f32(float, <8 x float>) +declare half @llvm.vector.reduce.fmul.f16.v16f16(half, <16 x half>) +declare half @llvm.vector.reduce.fmul.f16.v2f16(half, <2 x half>) +declare half @llvm.vector.reduce.fmul.f16.v4f16(half, <4 x half>) +declare half @llvm.vector.reduce.fmul.f16.v8f16(half, <8 x half>) diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll index 286277945eb65..2544474932c93 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll @@ -65,7 +65,7 @@ vector.body: ; preds = %vector.body, %vecto %0 = getelementptr inbounds i32, i32* %x, i32 %index %1 = bitcast i32* %0 to <4 x i32>* %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 - %2 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %wide.load) + %2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load) %3 = add i32 %2, %vec.phi %index.next = add i32 %index, 4 %4 = icmp eq i32 %index.next, %n.vec @@ -167,7 +167,7 @@ vector.body: ; preds = %vector.body, %vecto br i1 %3, label %middle.block, label %vector.body middle.block: ; preds = %vector.body - %4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> %2) + %4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %2) %cmp.n = icmp eq i32 %n.vec, %n br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1 @@ -267,7 +267,7 @@ vector.body: ; preds = %vector.body, %vecto br i1 %3, label %middle.block, label %vector.body middle.block: ; preds = %vector.body - %4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> %2) + %4 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %2) %cmp.n = icmp eq i32 %n.vec, %n br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1 @@ -367,7 +367,7 @@ vector.body: ; preds = %vector.body, %vecto br i1 %3, label %middle.block, label %vector.body middle.block: ; preds = %vector.body - %4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %2) + %4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %2) %cmp.n = icmp eq i32 %n.vec, %n br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1 @@ -467,7 +467,7 @@ vector.body: ; preds = %vector.body, %vecto br i1 %3, label %middle.block, label %vector.body middle.block: ; preds = %vector.body - %4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> %2) + %4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %2) %cmp.n = icmp eq i32 %n.vec, %n br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1 @@ -568,7 +568,7 @@ vector.body: ; preds = %vector.body, %vecto br i1 %3, label %middle.block, label %vector.body middle.block: ; preds = %vector.body - %4 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %2) + %4 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %2) %cmp.n = icmp eq i32 %n.vec, %n br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1 @@ -665,7 +665,7 @@ vector.body: ; preds = %vector.body, %vecto br i1 %3, label %middle.block, label %vector.body middle.block: ; preds = %vector.body - %4 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> %2) + %4 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.000000e+00, <4 x float> %2) %cmp.n = icmp eq i32 %n.vec, %n br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1 @@ -762,7 +762,7 @@ vector.body: ; preds = %vector.body, %vecto br i1 %4, label %middle.block, label %vector.body middle.block: ; preds = %vector.body - %5 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %3) + %5 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %3) %cmp.n = icmp eq i32 %n.vec, %n br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1 @@ -852,7 +852,7 @@ vector.body: ; preds = %vector.body, %vecto %0 = getelementptr inbounds i32, i32* %x, i32 %index %1 = bitcast i32* %0 to <4 x i32>* %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 - %l5 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %wide.load) + %l5 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %wide.load) %2 = icmp slt i32 %vec.phi, %l5 %3 = select i1 %2, i32 %vec.phi, i32 %l5 %index.next = add i32 %index, 4 @@ -958,7 +958,7 @@ vector.body: ; preds = %vector.body, %vecto br i1 %4, label %middle.block, label %vector.body middle.block: ; preds = %vector.body - %5 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %3) + %5 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %3) %cmp.n = icmp eq i32 %n.vec, %n br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1 @@ -1048,7 +1048,7 @@ vector.body: ; preds = %vector.body, %vecto %0 = getelementptr inbounds i32, i32* %x, i32 %index %1 = bitcast i32* %0 to <4 x i32>* %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 - %l5 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %wide.load) + %l5 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %wide.load) %2 = icmp sgt i32 %vec.phi, %l5 %3 = select i1 %2, i32 %vec.phi, i32 %l5 %index.next = add i32 %index, 4 @@ -1154,7 +1154,7 @@ vector.body: ; preds = %vector.body, %vecto br i1 %4, label %middle.block, label %vector.body middle.block: ; preds = %vector.body - %5 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %3) + %5 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %3) %cmp.n = icmp eq i32 %n.vec, %n br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1 @@ -1244,7 +1244,7 @@ vector.body: ; preds = %vector.body, %vecto %0 = getelementptr inbounds i32, i32* %x, i32 %index %1 = bitcast i32* %0 to <4 x i32>* %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 - %l5 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %wide.load) + %l5 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %wide.load) %2 = icmp ult i32 %vec.phi, %l5 %3 = select i1 %2, i32 %vec.phi, i32 %l5 %index.next = add i32 %index, 4 @@ -1350,7 +1350,7 @@ vector.body: ; preds = %vector.body, %vecto br i1 %4, label %middle.block, label %vector.body middle.block: ; preds = %vector.body - %5 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %3) + %5 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %3) %cmp.n = icmp eq i32 %n.vec, %n br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1 @@ -1440,7 +1440,7 @@ vector.body: ; preds = %vector.body, %vecto %0 = getelementptr inbounds i32, i32* %x, i32 %index %1 = bitcast i32* %0 to <4 x i32>* %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 - %l5 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %wide.load) + %l5 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %wide.load) %2 = icmp ugt i32 %vec.phi, %l5 %3 = select i1 %2, i32 %vec.phi, i32 %l5 %index.next = add i32 %index, 4 @@ -1553,7 +1553,7 @@ vector.body: ; preds = %vector.body, %vecto br i1 %4, label %middle.block, label %vector.body middle.block: ; preds = %vector.body - %5 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %3) + %5 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %3) %cmp.n = icmp eq i32 %n.vec, %n br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1 @@ -1658,7 +1658,7 @@ vector.body: ; preds = %vector.body, %vecto br i1 %4, label %middle.block, label %vector.body middle.block: ; preds = %vector.body - %5 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %3) + %5 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %3) %cmp.n = icmp eq i32 %n.vec, %n br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1 @@ -1722,7 +1722,7 @@ vector.body: ; preds = %vector.body, %vecto %1 = bitcast i32* %0 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %2 = select <4 x i1> %active.lane.mask, <4 x i32> %wide.masked.load, <4 x i32> zeroinitializer - %3 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %2) + %3 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %2) %4 = add i32 %3, %vec.phi %index.next = add i32 %index, 4 %5 = icmp eq i32 %index.next, %n.vec @@ -1777,7 +1777,7 @@ vector.body: ; preds = %vector.body, %vecto %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %4 = mul nsw <4 x i32> %wide.masked.load13, %wide.masked.load %5 = select <4 x i1> %active.lane.mask, <4 x i32> %4, <4 x i32> zeroinitializer - %6 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %5) + %6 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %5) %7 = add i32 %6, %vec.phi %index.next = add i32 %index, 4 %8 = icmp eq i32 %index.next, %n.vec @@ -1828,7 +1828,7 @@ vector.body: ; preds = %vector.body, %vecto %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) %2 = sext <8 x i16> %wide.masked.load to <8 x i32> %3 = select <8 x i1> %active.lane.mask, <8 x i32> %2, <8 x i32> zeroinitializer - %4 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %3) + %4 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %3) %5 = add i32 %4, %vec.phi %index.next = add i32 %index, 8 %6 = icmp eq i32 %index.next, %n.vec @@ -1885,7 +1885,7 @@ vector.body: ; preds = %vector.body, %vecto %5 = sext <8 x i16> %wide.masked.load14 to <8 x i32> %6 = mul nsw <8 x i32> %5, %2 %7 = select <8 x i1> %active.lane.mask, <8 x i32> %6, <8 x i32> zeroinitializer - %8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %7) + %8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %7) %9 = add i32 %8, %vec.phi %index.next = add i32 %index, 8 %10 = icmp eq i32 %index.next, %n.vec @@ -1936,7 +1936,7 @@ vector.body: ; preds = %vector.body, %vecto %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) %2 = zext <16 x i8> %wide.masked.load to <16 x i32> %3 = select <16 x i1> %active.lane.mask, <16 x i32> %2, <16 x i32> zeroinitializer - %4 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %3) + %4 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %3) %5 = add i32 %4, %vec.phi %index.next = add i32 %index, 16 %6 = icmp eq i32 %index.next, %n.vec @@ -1993,7 +1993,7 @@ vector.body: ; preds = %vector.body, %vecto %5 = zext <16 x i8> %wide.masked.load14 to <16 x i32> %6 = mul nuw nsw <16 x i32> %5, %2 %7 = select <16 x i1> %active.lane.mask, <16 x i32> %6, <16 x i32> zeroinitializer - %8 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %7) + %8 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %7) %9 = add i32 %8, %vec.phi %index.next = add i32 %index, 16 %10 = icmp eq i32 %index.next, %n.vec @@ -2043,7 +2043,7 @@ vector.body: ; preds = %vector.body, %vecto %1 = bitcast i16* %0 to <8 x i16>* %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) %2 = select <8 x i1> %active.lane.mask, <8 x i16> %wide.masked.load, <8 x i16> zeroinitializer - %3 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %2) + %3 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %2) %4 = add i16 %3, %vec.phi %index.next = add i32 %index, 8 %5 = icmp eq i32 %index.next, %n.vec @@ -2098,7 +2098,7 @@ vector.body: ; preds = %vector.body, %vecto %wide.masked.load16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %3, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) %4 = mul <8 x i16> %wide.masked.load16, %wide.masked.load %5 = select <8 x i1> %active.lane.mask, <8 x i16> %4, <8 x i16> zeroinitializer - %6 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %5) + %6 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %5) %7 = add i16 %6, %vec.phi %index.next = add i32 %index, 8 %8 = icmp eq i32 %index.next, %n.vec @@ -2149,7 +2149,7 @@ vector.body: ; preds = %vector.body, %vecto %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) %2 = zext <16 x i8> %wide.masked.load to <16 x i16> %3 = select <16 x i1> %active.lane.mask, <16 x i16> %2, <16 x i16> zeroinitializer - %4 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %3) + %4 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %3) %5 = add i16 %4, %vec.phi %index.next = add i32 %index, 16 %6 = icmp eq i32 %index.next, %n.vec @@ -2206,7 +2206,7 @@ vector.body: ; preds = %vector.body, %vecto %5 = zext <16 x i8> %wide.masked.load18 to <16 x i16> %6 = mul nuw <16 x i16> %5, %2 %7 = select <16 x i1> %active.lane.mask, <16 x i16> %6, <16 x i16> zeroinitializer - %8 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %7) + %8 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %7) %9 = add i16 %8, %vec.phi %index.next = add i32 %index, 16 %10 = icmp eq i32 %index.next, %n.vec @@ -2256,7 +2256,7 @@ vector.body: ; preds = %vector.body, %vecto %1 = bitcast i8* %0 to <16 x i8>* %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) %2 = select <16 x i1> %active.lane.mask, <16 x i8> %wide.masked.load, <16 x i8> zeroinitializer - %3 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %2) + %3 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %2) %4 = add i8 %3, %vec.phi %index.next = add i32 %index, 16 %5 = icmp eq i32 %index.next, %n.vec @@ -2311,7 +2311,7 @@ vector.body: ; preds = %vector.body, %vecto %wide.masked.load15 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) %4 = mul <16 x i8> %wide.masked.load15, %wide.masked.load %5 = select <16 x i1> %active.lane.mask, <16 x i8> %4, <16 x i8> zeroinitializer - %6 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %5) + %6 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %5) %7 = add i8 %6, %vec.phi %index.next = add i32 %index, 16 %8 = icmp eq i32 %index.next, %n.vec @@ -2364,7 +2364,7 @@ vector.body: ; preds = %vector.body, %vecto %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %2 = sext <4 x i32> %wide.masked.load to <4 x i64> %3 = select <4 x i1> %active.lane.mask, <4 x i64> %2, <4 x i64> zeroinitializer - %4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %3) + %4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %3) %5 = add i64 %4, %vec.phi %index.next = add i32 %index, 4 %6 = icmp eq i32 %index.next, %n.vec @@ -2423,7 +2423,7 @@ vector.body: ; preds = %vector.body, %vecto %5 = sext <4 x i32> %wide.masked.load14 to <4 x i64> %6 = mul nsw <4 x i64> %5, %2 %7 = select <4 x i1> %active.lane.mask, <4 x i64> %6, <4 x i64> zeroinitializer - %8 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %7) + %8 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %7) %9 = add i64 %8, %vec.phi %index.next = add i32 %index, 4 %10 = icmp eq i32 %index.next, %n.vec @@ -2482,7 +2482,7 @@ vector.body: ; preds = %vector.body, %vecto %5 = sext <8 x i16> %wide.masked.load14 to <8 x i64> %6 = mul nsw <8 x i64> %5, %2 %7 = select <8 x i1> %active.lane.mask, <8 x i64> %6, <8 x i64> zeroinitializer - %8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %7) + %8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %7) %9 = add i64 %8, %vec.phi %index.next = add i32 %index, 8 %10 = icmp eq i32 %index.next, %n.vec @@ -2497,26 +2497,26 @@ declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) #1 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2 declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) #1 declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) #2 -declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) #3 +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) #3 declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) #1 declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) #2 -declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>) #3 -declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) #3 -declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>) #3 -declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) #3 -declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>) #3 -declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>) #3 - -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>) -declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>) -declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) #3 +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) #3 +declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) #3 +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) #3 +declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) #3 +declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) #3 + +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>) +declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>) +declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) +declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll index b83b51b6f564f..ee15f82a71f5b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll @@ -8,7 +8,7 @@ define arm_aapcs_vfpcc i32 @add_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-NEXT: bx lr entry: %m = mul <4 x i32> %x, %y - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m) ret i32 %z } @@ -21,7 +21,7 @@ entry: %xx = zext <4 x i32> %x to <4 x i64> %yy = zext <4 x i32> %y to <4 x i64> %m = mul <4 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %m) ret i64 %z } @@ -34,7 +34,7 @@ entry: %xx = sext <4 x i32> %x to <4 x i64> %yy = sext <4 x i32> %y to <4 x i64> %m = mul <4 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %m) ret i64 %z } @@ -53,7 +53,7 @@ entry: %xx = zext <2 x i32> %x to <2 x i64> %yy = zext <2 x i32> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m) ret i64 %z } @@ -72,7 +72,7 @@ entry: %xx = sext <2 x i32> %x to <2 x i64> %yy = sext <2 x i32> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m) ret i64 %z } @@ -85,7 +85,7 @@ entry: %xx = zext <8 x i16> %x to <8 x i32> %yy = zext <8 x i16> %y to <8 x i32> %m = mul <8 x i32> %xx, %yy - %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %m) ret i32 %z } @@ -98,7 +98,7 @@ entry: %xx = sext <8 x i16> %x to <8 x i32> %yy = sext <8 x i16> %y to <8 x i32> %m = mul <8 x i32> %xx, %yy - %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %m) ret i32 %z } @@ -113,7 +113,7 @@ entry: %xx = zext <4 x i16> %x to <4 x i32> %yy = zext <4 x i16> %y to <4 x i32> %m = mul <4 x i32> %xx, %yy - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m) ret i32 %z } @@ -128,7 +128,7 @@ entry: %xx = sext <4 x i16> %x to <4 x i32> %yy = sext <4 x i16> %y to <4 x i32> %m = mul <4 x i32> %xx, %yy - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m) ret i32 %z } @@ -140,7 +140,7 @@ define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) ; CHECK-NEXT: bx lr entry: %m = mul <8 x i16> %x, %y - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %m) ret i16 %z } @@ -153,7 +153,7 @@ entry: %xx = zext <8 x i16> %x to <8 x i64> %yy = zext <8 x i16> %y to <8 x i64> %m = mul <8 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %m) ret i64 %z } @@ -166,7 +166,7 @@ entry: %xx = sext <8 x i16> %x to <8 x i64> %yy = sext <8 x i16> %y to <8 x i64> %m = mul <8 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %m) ret i64 %z } @@ -180,7 +180,7 @@ entry: %yy = zext <8 x i16> %y to <8 x i32> %m = mul <8 x i32> %xx, %yy %ma = zext <8 x i32> %m to <8 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %ma) ret i64 %z } @@ -194,7 +194,7 @@ entry: %yy = sext <8 x i16> %y to <8 x i32> %m = mul <8 x i32> %xx, %yy %ma = sext <8 x i32> %m to <8 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %ma) ret i64 %z } @@ -207,7 +207,7 @@ entry: %xx = sext <8 x i16> %x to <8 x i32> %m = mul <8 x i32> %xx, %xx %ma = zext <8 x i32> %m to <8 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %ma) ret i64 %z } @@ -228,7 +228,7 @@ entry: %xx = zext <2 x i16> %x to <2 x i64> %yy = zext <2 x i16> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m) ret i64 %z } @@ -250,7 +250,7 @@ entry: %xx = sext <2 x i16> %x to <2 x i64> %yy = sext <2 x i16> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m) ret i64 %z } @@ -263,7 +263,7 @@ entry: %xx = zext <16 x i8> %x to <16 x i32> %yy = zext <16 x i8> %y to <16 x i32> %m = mul <16 x i32> %xx, %yy - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %m) ret i32 %z } @@ -276,7 +276,7 @@ entry: %xx = sext <16 x i8> %x to <16 x i32> %yy = sext <16 x i8> %y to <16 x i32> %m = mul <16 x i32> %xx, %yy - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %m) ret i32 %z } @@ -290,7 +290,7 @@ entry: %yy = zext <16 x i8> %y to <16 x i16> %m = mul <16 x i16> %xx, %yy %ma = zext <16 x i16> %m to <16 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %ma) ret i32 %z } @@ -304,7 +304,7 @@ entry: %yy = sext <16 x i8> %y to <16 x i16> %m = mul <16 x i16> %xx, %yy %ma = sext <16 x i16> %m to <16 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %ma) ret i32 %z } @@ -317,7 +317,7 @@ entry: %xx = sext <16 x i8> %x to <16 x i16> %m = mul <16 x i16> %xx, %xx %ma = zext <16 x i16> %m to <16 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %ma) ret i32 %z } @@ -333,7 +333,7 @@ entry: %xx = zext <4 x i8> %x to <4 x i32> %yy = zext <4 x i8> %y to <4 x i32> %m = mul <4 x i32> %xx, %yy - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m) ret i32 %z } @@ -350,7 +350,7 @@ entry: %xx = sext <4 x i8> %x to <4 x i32> %yy = sext <4 x i8> %y to <4 x i32> %m = mul <4 x i32> %xx, %yy - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m) ret i32 %z } @@ -364,7 +364,7 @@ entry: %xx = zext <16 x i8> %x to <16 x i16> %yy = zext <16 x i8> %y to <16 x i16> %m = mul <16 x i16> %xx, %yy - %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %m) + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %m) ret i16 %z } @@ -378,7 +378,7 @@ entry: %xx = sext <16 x i8> %x to <16 x i16> %yy = sext <16 x i8> %y to <16 x i16> %m = mul <16 x i16> %xx, %yy - %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %m) + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %m) ret i16 %z } @@ -394,7 +394,7 @@ entry: %xx = zext <8 x i8> %x to <8 x i16> %yy = zext <8 x i8> %y to <8 x i16> %m = mul <8 x i16> %xx, %yy - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %m) ret i16 %z } @@ -410,7 +410,7 @@ entry: %xx = sext <8 x i8> %x to <8 x i16> %yy = sext <8 x i8> %y to <8 x i16> %m = mul <8 x i16> %xx, %yy - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %m) ret i16 %z } @@ -422,7 +422,7 @@ define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) { ; CHECK-NEXT: bx lr entry: %m = mul <16 x i8> %x, %y - %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %m) + %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %m) ret i8 %z } @@ -636,7 +636,7 @@ entry: %xx = zext <16 x i8> %x to <16 x i64> %yy = zext <16 x i8> %y to <16 x i64> %m = mul <16 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %m) ret i64 %z } @@ -803,7 +803,7 @@ entry: %xx = sext <16 x i8> %x to <16 x i64> %yy = sext <16 x i8> %y to <16 x i64> %m = mul <16 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %m) ret i64 %z } @@ -826,7 +826,7 @@ entry: %xx = zext <2 x i8> %x to <2 x i64> %yy = zext <2 x i8> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m) ret i64 %z } @@ -848,7 +848,7 @@ entry: %xx = sext <2 x i8> %x to <2 x i64> %yy = sext <2 x i8> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m) ret i64 %z } @@ -879,7 +879,7 @@ define arm_aapcs_vfpcc i64 @add_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) { ; CHECK-NEXT: pop {r4, pc} entry: %m = mul <2 x i64> %x, %y - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m) ret i64 %z } @@ -890,7 +890,7 @@ define arm_aapcs_vfpcc i32 @add_v4i32_v4i32_acc(<4 x i32> %x, <4 x i32> %y, i32 ; CHECK-NEXT: bx lr entry: %m = mul <4 x i32> %x, %y - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m) %r = add i32 %z, %a ret i32 %r } @@ -904,7 +904,7 @@ entry: %xx = zext <4 x i32> %x to <4 x i64> %yy = zext <4 x i32> %y to <4 x i64> %m = mul <4 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %m) %r = add i64 %z, %a ret i64 %r } @@ -918,7 +918,7 @@ entry: %xx = sext <4 x i32> %x to <4 x i64> %yy = sext <4 x i32> %y to <4 x i64> %m = mul <4 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %m) %r = add i64 %z, %a ret i64 %r } @@ -942,7 +942,7 @@ entry: %xx = zext <2 x i32> %x to <2 x i64> %yy = zext <2 x i32> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m) %r = add i64 %z, %a ret i64 %r } @@ -966,7 +966,7 @@ entry: %xx = sext <2 x i32> %x to <2 x i64> %yy = sext <2 x i32> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m) %r = add i64 %z, %a ret i64 %r } @@ -980,7 +980,7 @@ entry: %xx = zext <8 x i16> %x to <8 x i32> %yy = zext <8 x i16> %y to <8 x i32> %m = mul <8 x i32> %xx, %yy - %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %m) %r = add i32 %z, %a ret i32 %r } @@ -994,7 +994,7 @@ entry: %xx = sext <8 x i16> %x to <8 x i32> %yy = sext <8 x i16> %y to <8 x i32> %m = mul <8 x i32> %xx, %yy - %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %m) %r = add i32 %z, %a ret i32 %r } @@ -1010,7 +1010,7 @@ entry: %xx = zext <4 x i16> %x to <4 x i32> %yy = zext <4 x i16> %y to <4 x i32> %m = mul <4 x i32> %xx, %yy - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m) %r = add i32 %z, %a ret i32 %r } @@ -1026,7 +1026,7 @@ entry: %xx = sext <4 x i16> %x to <4 x i32> %yy = sext <4 x i16> %y to <4 x i32> %m = mul <4 x i32> %xx, %yy - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m) %r = add i32 %z, %a ret i32 %r } @@ -1039,7 +1039,7 @@ define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16_acc(<8 x i16> %x, <8 x i16> ; CHECK-NEXT: bx lr entry: %m = mul <8 x i16> %x, %y - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %m) %r = add i16 %z, %a ret i16 %r } @@ -1053,7 +1053,7 @@ entry: %xx = zext <8 x i16> %x to <8 x i64> %yy = zext <8 x i16> %y to <8 x i64> %m = mul <8 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %m) %r = add i64 %z, %a ret i64 %r } @@ -1067,7 +1067,7 @@ entry: %xx = sext <8 x i16> %x to <8 x i64> %yy = sext <8 x i16> %y to <8 x i64> %m = mul <8 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %m) %r = add i64 %z, %a ret i64 %r } @@ -1082,7 +1082,7 @@ entry: %yy = zext <8 x i16> %y to <8 x i32> %m = mul <8 x i32> %xx, %yy %ma = zext <8 x i32> %m to <8 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %ma) %r = add i64 %z, %a ret i64 %r } @@ -1097,7 +1097,7 @@ entry: %yy = sext <8 x i16> %y to <8 x i32> %m = mul <8 x i32> %xx, %yy %ma = sext <8 x i32> %m to <8 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %ma) %r = add i64 %z, %a ret i64 %r } @@ -1111,7 +1111,7 @@ entry: %xx = sext <8 x i16> %x to <8 x i32> %m = mul <8 x i32> %xx, %xx %ma = zext <8 x i32> %m to <8 x i64> - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %ma) %r = add i64 %z, %a ret i64 %r } @@ -1137,7 +1137,7 @@ entry: %xx = zext <2 x i16> %x to <2 x i64> %yy = zext <2 x i16> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m) %r = add i64 %z, %a ret i64 %r } @@ -1164,7 +1164,7 @@ entry: %xx = sext <2 x i16> %x to <2 x i64> %yy = sext <2 x i16> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m) %r = add i64 %z, %a ret i64 %r } @@ -1178,7 +1178,7 @@ entry: %xx = zext <16 x i8> %x to <16 x i32> %yy = zext <16 x i8> %y to <16 x i32> %m = mul <16 x i32> %xx, %yy - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %m) %r = add i32 %z, %a ret i32 %r } @@ -1192,7 +1192,7 @@ entry: %xx = sext <16 x i8> %x to <16 x i32> %yy = sext <16 x i8> %y to <16 x i32> %m = mul <16 x i32> %xx, %yy - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %m) %r = add i32 %z, %a ret i32 %r } @@ -1207,7 +1207,7 @@ entry: %yy = zext <16 x i8> %y to <16 x i16> %m = mul <16 x i16> %xx, %yy %ma = zext <16 x i16> %m to <16 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %ma) %r = add i32 %z, %a ret i32 %r } @@ -1222,7 +1222,7 @@ entry: %yy = sext <16 x i8> %y to <16 x i16> %m = mul <16 x i16> %xx, %yy %ma = sext <16 x i16> %m to <16 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %ma) %r = add i32 %z, %a ret i32 %r } @@ -1236,7 +1236,7 @@ entry: %xx = sext <16 x i8> %x to <16 x i16> %m = mul <16 x i16> %xx, %xx %ma = zext <16 x i16> %m to <16 x i32> - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %ma) %r = add i32 %z, %a ret i32 %r } @@ -1253,7 +1253,7 @@ entry: %xx = zext <4 x i8> %x to <4 x i32> %yy = zext <4 x i8> %y to <4 x i32> %m = mul <4 x i32> %xx, %yy - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m) %r = add i32 %z, %a ret i32 %r } @@ -1271,7 +1271,7 @@ entry: %xx = sext <4 x i8> %x to <4 x i32> %yy = sext <4 x i8> %y to <4 x i32> %m = mul <4 x i32> %xx, %yy - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m) %r = add i32 %z, %a ret i32 %r } @@ -1286,7 +1286,7 @@ entry: %xx = zext <16 x i8> %x to <16 x i16> %yy = zext <16 x i8> %y to <16 x i16> %m = mul <16 x i16> %xx, %yy - %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %m) + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %m) %r = add i16 %z, %a ret i16 %r } @@ -1301,7 +1301,7 @@ entry: %xx = sext <16 x i8> %x to <16 x i16> %yy = sext <16 x i8> %y to <16 x i16> %m = mul <16 x i16> %xx, %yy - %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %m) + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %m) %r = add i16 %z, %a ret i16 %r } @@ -1318,7 +1318,7 @@ entry: %xx = zext <8 x i8> %x to <8 x i16> %yy = zext <8 x i8> %y to <8 x i16> %m = mul <8 x i16> %xx, %yy - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %m) %r = add i16 %z, %a ret i16 %r } @@ -1335,7 +1335,7 @@ entry: %xx = sext <8 x i8> %x to <8 x i16> %yy = sext <8 x i8> %y to <8 x i16> %m = mul <8 x i16> %xx, %yy - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %m) %r = add i16 %z, %a ret i16 %r } @@ -1348,7 +1348,7 @@ define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8_acc(<16 x i8> %x, <16 x i8> % ; CHECK-NEXT: bx lr entry: %m = mul <16 x i8> %x, %y - %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %m) + %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %m) %r = add i8 %z, %a ret i8 %r } @@ -1565,7 +1565,7 @@ entry: %xx = zext <16 x i8> %x to <16 x i64> %yy = zext <16 x i8> %y to <16 x i64> %m = mul <16 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %m) %r = add i64 %z, %a ret i64 %r } @@ -1737,7 +1737,7 @@ entry: %xx = sext <16 x i8> %x to <16 x i64> %yy = sext <16 x i8> %y to <16 x i64> %m = mul <16 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %m) %r = add i64 %z, %a ret i64 %r } @@ -1765,7 +1765,7 @@ entry: %xx = zext <2 x i8> %x to <2 x i64> %yy = zext <2 x i8> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m) %r = add i64 %z, %a ret i64 %r } @@ -1792,7 +1792,7 @@ entry: %xx = sext <2 x i8> %x to <2 x i64> %yy = sext <2 x i8> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m) %r = add i64 %z, %a ret i64 %r } @@ -1826,18 +1826,18 @@ define arm_aapcs_vfpcc i64 @add_v2i64_v2i64_acc(<2 x i64> %x, <2 x i64> %y, i64 ; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %m = mul <2 x i64> %x, %y - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m) %r = add i64 %z, %a ret i64 %r } -declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) -declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>) -declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) +declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll index 02d124890c6bb..72462bb87f022 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll @@ -11,7 +11,7 @@ entry: %c = icmp eq <4 x i32> %b, zeroinitializer %m = mul <4 x i32> %x, %y %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) ret i32 %z } @@ -27,7 +27,7 @@ entry: %yy = zext <4 x i32> %y to <4 x i64> %m = mul <4 x i64> %xx, %yy %s = select <4 x i1> %c, <4 x i64> %m, <4 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s) ret i64 %z } @@ -43,7 +43,7 @@ entry: %yy = sext <4 x i32> %y to <4 x i64> %m = mul <4 x i64> %xx, %yy %s = select <4 x i1> %c, <4 x i64> %m, <4 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s) ret i64 %z } @@ -79,7 +79,7 @@ entry: %yy = zext <2 x i32> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) ret i64 %z } @@ -115,7 +115,7 @@ entry: %yy = sext <2 x i32> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) ret i64 %z } @@ -131,7 +131,7 @@ entry: %yy = zext <8 x i16> %y to <8 x i32> %m = mul <8 x i32> %xx, %yy %s = select <8 x i1> %c, <8 x i32> %m, <8 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s) ret i32 %z } @@ -147,7 +147,7 @@ entry: %yy = sext <8 x i16> %y to <8 x i32> %m = mul <8 x i32> %xx, %yy %s = select <8 x i1> %c, <8 x i32> %m, <8 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s) ret i32 %z } @@ -166,7 +166,7 @@ entry: %yy = zext <4 x i16> %y to <4 x i32> %m = mul <4 x i32> %xx, %yy %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) ret i32 %z } @@ -185,7 +185,7 @@ entry: %yy = sext <4 x i16> %y to <4 x i32> %m = mul <4 x i32> %xx, %yy %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) ret i32 %z } @@ -200,7 +200,7 @@ entry: %c = icmp eq <8 x i16> %b, zeroinitializer %m = mul <8 x i16> %x, %y %s = select <8 x i1> %c, <8 x i16> %m, <8 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) ret i16 %z } @@ -216,7 +216,7 @@ entry: %yy = zext <8 x i16> %y to <8 x i64> %m = mul <8 x i64> %xx, %yy %s = select <8 x i1> %c, <8 x i64> %m, <8 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) ret i64 %z } @@ -232,7 +232,7 @@ entry: %yy = sext <8 x i16> %y to <8 x i64> %m = mul <8 x i64> %xx, %yy %s = select <8 x i1> %c, <8 x i64> %m, <8 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) ret i64 %z } @@ -249,7 +249,7 @@ entry: %m = mul <8 x i32> %xx, %yy %ma = zext <8 x i32> %m to <8 x i64> %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) ret i64 %z } @@ -266,7 +266,7 @@ entry: %m = mul <8 x i32> %xx, %yy %ma = sext <8 x i32> %m to <8 x i64> %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) ret i64 %z } @@ -282,7 +282,7 @@ entry: %m = mul <8 x i32> %xx, %xx %ma = zext <8 x i32> %m to <8 x i64> %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) ret i64 %z } @@ -334,7 +334,7 @@ entry: %yy = zext <2 x i16> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) ret i64 %z } @@ -385,7 +385,7 @@ entry: %yy = sext <2 x i16> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) ret i64 %z } @@ -401,7 +401,7 @@ entry: %yy = zext <16 x i8> %y to <16 x i32> %m = mul <16 x i32> %xx, %yy %s = select <16 x i1> %c, <16 x i32> %m, <16 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) ret i32 %z } @@ -417,7 +417,7 @@ entry: %yy = sext <16 x i8> %y to <16 x i32> %m = mul <16 x i32> %xx, %yy %s = select <16 x i1> %c, <16 x i32> %m, <16 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) ret i32 %z } @@ -434,7 +434,7 @@ entry: %m = mul <16 x i16> %xx, %yy %ma = zext <16 x i16> %m to <16 x i32> %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) ret i32 %z } @@ -451,7 +451,7 @@ entry: %m = mul <16 x i16> %xx, %yy %ma = sext <16 x i16> %m to <16 x i32> %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) ret i32 %z } @@ -467,7 +467,7 @@ entry: %m = mul <16 x i16> %xx, %xx %ma = zext <16 x i16> %m to <16 x i32> %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) ret i32 %z } @@ -487,7 +487,7 @@ entry: %yy = zext <4 x i8> %y to <4 x i32> %m = mul <4 x i32> %xx, %yy %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) ret i32 %z } @@ -509,7 +509,7 @@ entry: %yy = sext <4 x i8> %y to <4 x i32> %m = mul <4 x i32> %xx, %yy %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) ret i32 %z } @@ -526,7 +526,7 @@ entry: %yy = zext <16 x i8> %y to <16 x i16> %m = mul <16 x i16> %xx, %yy %s = select <16 x i1> %c, <16 x i16> %m, <16 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s) ret i16 %z } @@ -543,7 +543,7 @@ entry: %yy = sext <16 x i8> %y to <16 x i16> %m = mul <16 x i16> %xx, %yy %s = select <16 x i1> %c, <16 x i16> %m, <16 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s) ret i16 %z } @@ -563,7 +563,7 @@ entry: %yy = zext <8 x i8> %y to <8 x i16> %m = mul <8 x i16> %xx, %yy %s = select <8 x i1> %c, <8 x i16> %m, <8 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) ret i16 %z } @@ -583,7 +583,7 @@ entry: %yy = sext <8 x i8> %y to <8 x i16> %m = mul <8 x i16> %xx, %yy %s = select <8 x i1> %c, <8 x i16> %m, <8 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) ret i16 %z } @@ -598,7 +598,7 @@ entry: %c = icmp eq <16 x i8> %b, zeroinitializer %m = mul <16 x i8> %x, %y %s = select <16 x i1> %c, <16 x i8> %m, <16 x i8> zeroinitializer - %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %s) + %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %s) ret i8 %z } @@ -1010,7 +1010,7 @@ entry: %yy = zext <16 x i8> %y to <16 x i64> %m = mul <16 x i64> %xx, %yy %s = select <16 x i1> %c, <16 x i64> %m, <16 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s) ret i64 %z } @@ -1353,7 +1353,7 @@ entry: %yy = sext <16 x i8> %y to <16 x i64> %m = mul <16 x i64> %xx, %yy %s = select <16 x i1> %c, <16 x i64> %m, <16 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s) ret i64 %z } @@ -1405,7 +1405,7 @@ entry: %yy = zext <2 x i8> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) ret i64 %z } @@ -1456,7 +1456,7 @@ entry: %yy = sext <2 x i8> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) ret i64 %z } @@ -1509,7 +1509,7 @@ entry: %c = icmp eq <2 x i64> %b, zeroinitializer %m = mul <2 x i64> %x, %y %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) ret i64 %z } @@ -1523,7 +1523,7 @@ entry: %c = icmp eq <4 x i32> %b, zeroinitializer %m = mul <4 x i32> %x, %y %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -1540,7 +1540,7 @@ entry: %yy = zext <4 x i32> %y to <4 x i64> %m = mul <4 x i64> %xx, %yy %s = select <4 x i1> %c, <4 x i64> %m, <4 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1557,7 +1557,7 @@ entry: %yy = sext <4 x i32> %y to <4 x i64> %m = mul <4 x i64> %xx, %yy %s = select <4 x i1> %c, <4 x i64> %m, <4 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1598,7 +1598,7 @@ entry: %yy = zext <2 x i32> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1639,7 +1639,7 @@ entry: %yy = sext <2 x i32> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1656,7 +1656,7 @@ entry: %yy = zext <8 x i16> %y to <8 x i32> %m = mul <8 x i32> %xx, %yy %s = select <8 x i1> %c, <8 x i32> %m, <8 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -1673,7 +1673,7 @@ entry: %yy = sext <8 x i16> %y to <8 x i32> %m = mul <8 x i32> %xx, %yy %s = select <8 x i1> %c, <8 x i32> %m, <8 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -1693,7 +1693,7 @@ entry: %yy = zext <4 x i16> %y to <4 x i32> %m = mul <4 x i32> %xx, %yy %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -1713,7 +1713,7 @@ entry: %yy = sext <4 x i16> %y to <4 x i32> %m = mul <4 x i32> %xx, %yy %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -1729,7 +1729,7 @@ entry: %c = icmp eq <8 x i16> %b, zeroinitializer %m = mul <8 x i16> %x, %y %s = select <8 x i1> %c, <8 x i16> %m, <8 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) %r = add i16 %z, %a ret i16 %r } @@ -1746,7 +1746,7 @@ entry: %yy = zext <8 x i16> %y to <8 x i64> %m = mul <8 x i64> %xx, %yy %s = select <8 x i1> %c, <8 x i64> %m, <8 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1763,7 +1763,7 @@ entry: %yy = sext <8 x i16> %y to <8 x i64> %m = mul <8 x i64> %xx, %yy %s = select <8 x i1> %c, <8 x i64> %m, <8 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1781,7 +1781,7 @@ entry: %m = mul <8 x i32> %xx, %yy %ma = zext <8 x i32> %m to <8 x i64> %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1799,7 +1799,7 @@ entry: %m = mul <8 x i32> %xx, %yy %ma = sext <8 x i32> %m to <8 x i64> %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1816,7 +1816,7 @@ entry: %m = mul <8 x i32> %xx, %xx %ma = zext <8 x i32> %m to <8 x i64> %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1873,7 +1873,7 @@ entry: %yy = zext <2 x i16> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1929,7 +1929,7 @@ entry: %yy = sext <2 x i16> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -1946,7 +1946,7 @@ entry: %yy = zext <16 x i8> %y to <16 x i32> %m = mul <16 x i32> %xx, %yy %s = select <16 x i1> %c, <16 x i32> %m, <16 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -1963,7 +1963,7 @@ entry: %yy = sext <16 x i8> %y to <16 x i32> %m = mul <16 x i32> %xx, %yy %s = select <16 x i1> %c, <16 x i32> %m, <16 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -1981,7 +1981,7 @@ entry: %m = mul <16 x i16> %xx, %yy %ma = zext <16 x i16> %m to <16 x i32> %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -1999,7 +1999,7 @@ entry: %m = mul <16 x i16> %xx, %yy %ma = sext <16 x i16> %m to <16 x i32> %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -2016,7 +2016,7 @@ entry: %m = mul <16 x i16> %xx, %xx %ma = zext <16 x i16> %m to <16 x i32> %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -2037,7 +2037,7 @@ entry: %yy = zext <4 x i8> %y to <4 x i32> %m = mul <4 x i32> %xx, %yy %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -2060,7 +2060,7 @@ entry: %yy = sext <4 x i8> %y to <4 x i32> %m = mul <4 x i32> %xx, %yy %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s) + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) %r = add i32 %z, %a ret i32 %r } @@ -2078,7 +2078,7 @@ entry: %yy = zext <16 x i8> %y to <16 x i16> %m = mul <16 x i16> %xx, %yy %s = select <16 x i1> %c, <16 x i16> %m, <16 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s) %r = add i16 %z, %a ret i16 %r } @@ -2096,7 +2096,7 @@ entry: %yy = sext <16 x i8> %y to <16 x i16> %m = mul <16 x i16> %xx, %yy %s = select <16 x i1> %c, <16 x i16> %m, <16 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s) %r = add i16 %z, %a ret i16 %r } @@ -2117,7 +2117,7 @@ entry: %yy = zext <8 x i8> %y to <8 x i16> %m = mul <8 x i16> %xx, %yy %s = select <8 x i1> %c, <8 x i16> %m, <8 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) %r = add i16 %z, %a ret i16 %r } @@ -2138,7 +2138,7 @@ entry: %yy = sext <8 x i8> %y to <8 x i16> %m = mul <8 x i16> %xx, %yy %s = select <8 x i1> %c, <8 x i16> %m, <8 x i16> zeroinitializer - %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s) + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) %r = add i16 %z, %a ret i16 %r } @@ -2154,7 +2154,7 @@ entry: %c = icmp eq <16 x i8> %b, zeroinitializer %m = mul <16 x i8> %x, %y %s = select <16 x i1> %c, <16 x i8> %m, <16 x i8> zeroinitializer - %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %s) + %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %s) %r = add i8 %z, %a ret i8 %r } @@ -2569,7 +2569,7 @@ entry: %yy = zext <16 x i8> %y to <16 x i64> %m = mul <16 x i64> %xx, %yy %s = select <16 x i1> %c, <16 x i64> %m, <16 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -2917,7 +2917,7 @@ entry: %yy = sext <16 x i8> %y to <16 x i64> %m = mul <16 x i64> %xx, %yy %s = select <16 x i1> %c, <16 x i64> %m, <16 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -2974,7 +2974,7 @@ entry: %yy = zext <2 x i8> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -3030,7 +3030,7 @@ entry: %yy = sext <2 x i8> %y to <2 x i64> %m = mul <2 x i64> %xx, %yy %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) %r = add i64 %z, %a ret i64 %r } @@ -3088,18 +3088,18 @@ entry: %c = icmp eq <2 x i64> %b, zeroinitializer %m = mul <2 x i64> %x, %y %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s) + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) %r = add i64 %z, %a ret i64 %r } -declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) -declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>) -declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) +declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mul.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mul.ll index 7510169fbbd5c..4ff682126adaf 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mul.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mul.ll @@ -9,7 +9,7 @@ define arm_aapcs_vfpcc i32 @mul_v2i32(<2 x i32> %x) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> %x) + %z = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %x) ret i32 %z } @@ -25,7 +25,7 @@ define arm_aapcs_vfpcc i32 @mul_v4i32(<4 x i32> %x) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> %x) + %z = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %x) ret i32 %z } @@ -42,7 +42,7 @@ define arm_aapcs_vfpcc i32 @mul_v8i32(<8 x i32> %x) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> %x) + %z = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %x) ret i32 %z } @@ -58,7 +58,7 @@ define arm_aapcs_vfpcc i16 @mul_v4i16(<4 x i16> %x) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> %x) + %z = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> %x) ret i16 %z } @@ -76,7 +76,7 @@ define arm_aapcs_vfpcc i16 @mul_v8i16(<8 x i16> %x) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> %x) + %z = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %x) ret i16 %z } @@ -95,7 +95,7 @@ define arm_aapcs_vfpcc i16 @mul_v16i16(<16 x i16> %x) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> %x) + %z = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %x) ret i16 %z } @@ -113,7 +113,7 @@ define arm_aapcs_vfpcc i8 @mul_v8i8(<8 x i8> %x) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> %x) + %z = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> %x) ret i8 %z } @@ -133,7 +133,7 @@ define arm_aapcs_vfpcc i8 @mul_v16i8(<16 x i8> %x) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> %x) + %z = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %x) ret i8 %z } @@ -154,7 +154,7 @@ define arm_aapcs_vfpcc i8 @mul_v32i8(<32 x i8> %x) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> %x) + %z = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %x) ret i8 %z } @@ -163,7 +163,7 @@ define arm_aapcs_vfpcc i64 @mul_v1i64(<1 x i64> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> %x) + %z = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> %x) ret i64 %z } @@ -179,7 +179,7 @@ define arm_aapcs_vfpcc i64 @mul_v2i64(<2 x i64> %x) { ; CHECK-NEXT: mla r1, r3, r1, r2 ; CHECK-NEXT: bx lr entry: - %z = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> %x) + %z = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %x) ret i64 %z } @@ -207,7 +207,7 @@ define arm_aapcs_vfpcc i64 @mul_v4i64(<4 x i64> %x) { ; CHECK-NEXT: mla r1, r1, r6, r4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: - %z = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> %x) + %z = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> %x) ret i64 %z } @@ -220,7 +220,7 @@ define arm_aapcs_vfpcc i32 @mul_v2i32_acc(<2 x i32> %x, i32 %y) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> %x) + %z = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %x) %r = mul i32 %y, %z ret i32 %r } @@ -238,7 +238,7 @@ define arm_aapcs_vfpcc i32 @mul_v4i32_acc(<4 x i32> %x, i32 %y) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> %x) + %z = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %x) %r = mul i32 %y, %z ret i32 %r } @@ -257,7 +257,7 @@ define arm_aapcs_vfpcc i32 @mul_v8i32_acc(<8 x i32> %x, i32 %y) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> %x) + %z = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %x) %r = mul i32 %y, %z ret i32 %r } @@ -275,7 +275,7 @@ define arm_aapcs_vfpcc i16 @mul_v4i16_acc(<4 x i16> %x, i16 %y) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> %x) + %z = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> %x) %r = mul i16 %y, %z ret i16 %r } @@ -295,7 +295,7 @@ define arm_aapcs_vfpcc i16 @mul_v8i16_acc(<8 x i16> %x, i16 %y) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> %x) + %z = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %x) %r = mul i16 %y, %z ret i16 %r } @@ -316,7 +316,7 @@ define arm_aapcs_vfpcc i16 @mul_v16i16_acc(<16 x i16> %x, i16 %y) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> %x) + %z = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %x) %r = mul i16 %y, %z ret i16 %r } @@ -336,7 +336,7 @@ define arm_aapcs_vfpcc i8 @mul_v8i8_acc(<8 x i8> %x, i8 %y) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> %x) + %z = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> %x) %r = mul i8 %y, %z ret i8 %r } @@ -358,7 +358,7 @@ define arm_aapcs_vfpcc i8 @mul_v16i8_acc(<16 x i8> %x, i8 %y) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> %x) + %z = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %x) %r = mul i8 %y, %z ret i8 %r } @@ -381,7 +381,7 @@ define arm_aapcs_vfpcc i8 @mul_v32i8_acc(<32 x i8> %x, i8 %y) { ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr entry: - %z = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> %x) + %z = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %x) %r = mul i8 %y, %z ret i8 %r } @@ -397,7 +397,7 @@ define arm_aapcs_vfpcc i64 @mul_v1i64_acc(<1 x i64> %x, i64 %y) { ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: pop {r7, pc} entry: - %z = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> %x) + %z = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> %x) %r = mul i64 %y, %z ret i64 %r } @@ -420,7 +420,7 @@ define arm_aapcs_vfpcc i64 @mul_v2i64_acc(<2 x i64> %x, i64 %y) { ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: pop {r4, pc} entry: - %z = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> %x) + %z = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %x) %r = mul i64 %y, %z ret i64 %r } @@ -453,20 +453,20 @@ define arm_aapcs_vfpcc i64 @mul_v4i64_acc(<4 x i64> %x, i64 %y) { ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} entry: - %z = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> %x) + %z = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> %x) %r = mul i64 %y, %z ret i64 %r } -declare i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32>) -declare i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64>) -declare i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8>) +declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>) +declare i64 @llvm.vector.reduce.mul.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>) +declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.mul.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>) diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll index 80c8ae65e4b7f..9502716bc98cf 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll @@ -1,18 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s -declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) -declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>) -declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) -declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) +declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) +declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) +declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) +declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8(<16 x i8> %s1) { ; CHECK-LABEL: vmaxv_s_v16i8: @@ -20,7 +20,7 @@ define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8(<16 x i8> %s1) { ; CHECK-NEXT: mvn r0, #127 ; CHECK-NEXT: vmaxv.s8 r0, q0 ; CHECK-NEXT: bx lr - %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %s1) + %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %s1) ret i8 %r } @@ -31,7 +31,7 @@ define arm_aapcs_vfpcc i16 @vmaxv_s_v8i16(<8 x i16> %s1) { ; CHECK-NEXT: movt r0, #65535 ; CHECK-NEXT: vmaxv.s16 r0, q0 ; CHECK-NEXT: bx lr - %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %s1) + %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %s1) ret i16 %r } @@ -41,7 +41,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32(<4 x i32> %s1) { ; CHECK-NEXT: mov.w r0, #-2147483648 ; CHECK-NEXT: vmaxv.s32 r0, q0 ; CHECK-NEXT: bx lr - %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %s1) + %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %s1) ret i32 %r } @@ -51,7 +51,7 @@ define arm_aapcs_vfpcc i8 @vmaxv_u_v16i8(<16 x i8> %s1) { ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: vmaxv.u8 r0, q0 ; CHECK-NEXT: bx lr - %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %s1) + %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %s1) ret i8 %r } @@ -61,7 +61,7 @@ define arm_aapcs_vfpcc i16 @vmaxv_u_v8i16(<8 x i16> %s1) { ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: vmaxv.u16 r0, q0 ; CHECK-NEXT: bx lr - %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %s1) + %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %s1) ret i16 %r } @@ -71,7 +71,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32(<4 x i32> %s1) { ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: vmaxv.u32 r0, q0 ; CHECK-NEXT: bx lr - %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %s1) + %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %s1) ret i32 %r } @@ -81,7 +81,7 @@ define arm_aapcs_vfpcc i8 @vminv_s_v16i8(<16 x i8> %s1) { ; CHECK-NEXT: movs r0, #127 ; CHECK-NEXT: vminv.s8 r0, q0 ; CHECK-NEXT: bx lr - %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %s1) + %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %s1) ret i8 %r } @@ -91,7 +91,7 @@ define arm_aapcs_vfpcc i16 @vminv_s_v8i16(<8 x i16> %s1) { ; CHECK-NEXT: movw r0, #32767 ; CHECK-NEXT: vminv.s16 r0, q0 ; CHECK-NEXT: bx lr - %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %s1) + %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %s1) ret i16 %r } @@ -101,7 +101,7 @@ define arm_aapcs_vfpcc i32 @vminv_s_v4i32(<4 x i32> %s1) { ; CHECK-NEXT: mvn r0, #-2147483648 ; CHECK-NEXT: vminv.s32 r0, q0 ; CHECK-NEXT: bx lr - %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %s1) + %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %s1) ret i32 %r } @@ -111,7 +111,7 @@ define arm_aapcs_vfpcc i8 @vminv_u_v16i8(<16 x i8> %s1) { ; CHECK-NEXT: movs r0, #255 ; CHECK-NEXT: vminv.u8 r0, q0 ; CHECK-NEXT: bx lr - %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %s1) + %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %s1) ret i8 %r } @@ -121,7 +121,7 @@ define arm_aapcs_vfpcc i16 @vminv_u_v8i16(<8 x i16> %s1) { ; CHECK-NEXT: movw r0, #65535 ; CHECK-NEXT: vminv.u16 r0, q0 ; CHECK-NEXT: bx lr - %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %s1) + %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %s1) ret i16 %r } @@ -131,7 +131,7 @@ define arm_aapcs_vfpcc i32 @vminv_u_v4i32(<4 x i32> %s1) { ; CHECK-NEXT: mov.w r0, #-1 ; CHECK-NEXT: vminv.u32 r0, q0 ; CHECK-NEXT: bx lr - %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %s1) + %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %s1) ret i32 %r } @@ -142,7 +142,7 @@ define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8_i8(<16 x i8> %s1, i8 %s2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmaxv.s8 r0, q0 ; CHECK-NEXT: bx lr - %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %s1) + %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %s1) %c = icmp sgt i8 %r, %s2 %s = select i1 %c, i8 %r, i8 %s2 ret i8 %s @@ -157,7 +157,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_s_v16i8_i32(<16 x i8> %s1, i32 %s2) { ; CHECK-NEXT: cmp r1, r0 ; CHECK-NEXT: csel r0, r1, r0, gt ; CHECK-NEXT: bx lr - %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %s1) + %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %s1) %rs = sext i8 %r to i32 %c = icmp sgt i32 %rs, %s2 %s = select i1 %c, i32 %rs, i32 %s2 @@ -169,7 +169,7 @@ define arm_aapcs_vfpcc i16 @vmaxv_s_v8i16_i16(<8 x i16> %s1, i16 %s2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmaxv.s16 r0, q0 ; CHECK-NEXT: bx lr - %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %s1) + %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %s1) %c = icmp sgt i16 %r, %s2 %s = select i1 %c, i16 %r, i16 %s2 ret i16 %s @@ -185,7 +185,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_s_v8i16_i32(<8 x i16> %s1, i32 %s2) { ; CHECK-NEXT: cmp r1, r0 ; CHECK-NEXT: csel r0, r1, r0, gt ; CHECK-NEXT: bx lr - %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %s1) + %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %s1) %rs = sext i16 %r to i32 %c = icmp sgt i32 %rs, %s2 %s = select i1 %c, i32 %rs, i32 %s2 @@ -197,7 +197,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmaxv.s32 r0, q0 ; CHECK-NEXT: bx lr - %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %s1) + %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %s1) %c = icmp sgt i32 %r, %s2 %s = select i1 %c, i32 %r, i32 %s2 ret i32 %s @@ -208,7 +208,7 @@ define arm_aapcs_vfpcc i8 @vmaxv_u_v16i8_i8(<16 x i8> %s1, i8 %s2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmaxv.u8 r0, q0 ; CHECK-NEXT: bx lr - %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %s1) + %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %s1) %c = icmp ugt i8 %r, %s2 %s = select i1 %c, i8 %r, i8 %s2 ret i8 %s @@ -223,7 +223,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_u_v16i8_i32(<16 x i8> %s1, i32 %s2) { ; CHECK-NEXT: cmp r1, r0 ; CHECK-NEXT: csel r0, r1, r0, hi ; CHECK-NEXT: bx lr - %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %s1) + %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %s1) %rs = zext i8 %r to i32 %c = icmp ugt i32 %rs, %s2 %s = select i1 %c, i32 %rs, i32 %s2 @@ -235,7 +235,7 @@ define arm_aapcs_vfpcc i16 @vmaxv_u_v8i16_i16(<8 x i16> %s1, i16 %s2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmaxv.u16 r0, q0 ; CHECK-NEXT: bx lr - %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %s1) + %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %s1) %c = icmp ugt i16 %r, %s2 %s = select i1 %c, i16 %r, i16 %s2 ret i16 %s @@ -250,7 +250,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_u_v8i16_i32(<8 x i16> %s1, i32 %s2) { ; CHECK-NEXT: cmp r1, r0 ; CHECK-NEXT: csel r0, r1, r0, hi ; CHECK-NEXT: bx lr - %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %s1) + %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %s1) %rs = zext i16 %r to i32 %c = icmp ugt i32 %rs, %s2 %s = select i1 %c, i32 %rs, i32 %s2 @@ -262,7 +262,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmaxv.u32 r0, q0 ; CHECK-NEXT: bx lr - %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %s1) + %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %s1) %c = icmp ugt i32 %r, %s2 %s = select i1 %c, i32 %r, i32 %s2 ret i32 %s @@ -273,7 +273,7 @@ define arm_aapcs_vfpcc i8 @vminv_s_v16i8_i8(<16 x i8> %s1, i8 %s2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vminv.s8 r0, q0 ; CHECK-NEXT: bx lr - %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %s1) + %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %s1) %c = icmp slt i8 %r, %s2 %s = select i1 %c, i8 %r, i8 %s2 ret i8 %s @@ -288,7 +288,7 @@ define arm_aapcs_vfpcc i32 @vminv_s_v16i8_i32(<16 x i8> %s1, i32 %s2) { ; CHECK-NEXT: cmp r1, r0 ; CHECK-NEXT: csel r0, r1, r0, lt ; CHECK-NEXT: bx lr - %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %s1) + %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %s1) %rs = sext i8 %r to i32 %c = icmp slt i32 %rs, %s2 %s = select i1 %c, i32 %rs, i32 %s2 @@ -300,7 +300,7 @@ define arm_aapcs_vfpcc i16 @vminv_s_v8i16_i16(<8 x i16> %s1, i16 %s2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vminv.s16 r0, q0 ; CHECK-NEXT: bx lr - %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %s1) + %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %s1) %c = icmp slt i16 %r, %s2 %s = select i1 %c, i16 %r, i16 %s2 ret i16 %s @@ -315,7 +315,7 @@ define arm_aapcs_vfpcc i32 @vminv_s_v8i16_i32(<8 x i16> %s1, i32 %s2) { ; CHECK-NEXT: cmp r1, r0 ; CHECK-NEXT: csel r0, r1, r0, lt ; CHECK-NEXT: bx lr - %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %s1) + %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %s1) %rs = sext i16 %r to i32 %c = icmp slt i32 %rs, %s2 %s = select i1 %c, i32 %rs, i32 %s2 @@ -327,7 +327,7 @@ define arm_aapcs_vfpcc i32 @vminv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vminv.s32 r0, q0 ; CHECK-NEXT: bx lr - %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %s1) + %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %s1) %c = icmp slt i32 %r, %s2 %s = select i1 %c, i32 %r, i32 %s2 ret i32 %s @@ -338,7 +338,7 @@ define arm_aapcs_vfpcc i8 @vminv_u_v16i8_i8(<16 x i8> %s1, i8 %s2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vminv.u8 r0, q0 ; CHECK-NEXT: bx lr - %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %s1) + %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %s1) %c = icmp ult i8 %r, %s2 %s = select i1 %c, i8 %r, i8 %s2 ret i8 %s @@ -353,7 +353,7 @@ define arm_aapcs_vfpcc i32 @vminv_u_v16i8_i32(<16 x i8> %s1, i32 %s2) { ; CHECK-NEXT: cmp r1, r0 ; CHECK-NEXT: csel r0, r1, r0, lo ; CHECK-NEXT: bx lr - %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %s1) + %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %s1) %rs = zext i8 %r to i32 %c = icmp ult i32 %rs, %s2 %s = select i1 %c, i32 %rs, i32 %s2 @@ -365,7 +365,7 @@ define arm_aapcs_vfpcc i16 @vminv_u_v8i16_i16(<8 x i16> %s1, i16 %s2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vminv.u16 r0, q0 ; CHECK-NEXT: bx lr - %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %s1) + %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %s1) %c = icmp ult i16 %r, %s2 %s = select i1 %c, i16 %r, i16 %s2 ret i16 %s @@ -380,7 +380,7 @@ define arm_aapcs_vfpcc i32 @vminv_u_v8i16_i32(<8 x i16> %s1, i32 %s2) { ; CHECK-NEXT: cmp r1, r0 ; CHECK-NEXT: csel r0, r1, r0, lo ; CHECK-NEXT: bx lr - %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %s1) + %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %s1) %rs = zext i16 %r to i32 %c = icmp ult i32 %rs, %s2 %s = select i1 %c, i32 %rs, i32 %s2 @@ -392,7 +392,7 @@ define arm_aapcs_vfpcc i32 @vminv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vminv.u32 r0, q0 ; CHECK-NEXT: bx lr - %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %s1) + %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %s1) %c = icmp ult i32 %r, %s2 %s = select i1 %c, i32 %r, i32 %s2 ret i32 %s diff --git a/llvm/test/CodeGen/X86/haddsub.ll b/llvm/test/CodeGen/X86/haddsub.ll index 2ea26b45e8713..496d2367445bd 100644 --- a/llvm/test/CodeGen/X86/haddsub.ll +++ b/llvm/test/CodeGen/X86/haddsub.ll @@ -1628,8 +1628,8 @@ define float @extract_extract01_v4f32_fadd_f32_uses3(<4 x float> %x, float* %p1, ; Repeat tests from general reductions to verify output for hoppy targets: ; PR38971: https://bugs.llvm.org/show_bug.cgi?id=38971 -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double, <4 x double>) +declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>) +declare double @llvm.vector.reduce.fadd.f64.v4f64(double, <4 x double>) define float @fadd_reduce_v8f32(float %a0, <8 x float> %a1) { ; SSE3-SLOW-LABEL: fadd_reduce_v8f32: @@ -1672,7 +1672,7 @@ define float @fadd_reduce_v8f32(float %a0, <8 x float> %a1) { ; AVX-FAST-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq - %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float %a0, <8 x float> %a1) + %r = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1) ret float %r } @@ -1711,7 +1711,7 @@ define double @fadd_reduce_v4f64(double %a0, <4 x double> %a1) { ; AVX-FAST-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq - %r = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double %a0, <4 x double> %a1) + %r = call fast double @llvm.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1) ret double %r } diff --git a/llvm/test/CodeGen/X86/pr45378.ll b/llvm/test/CodeGen/X86/pr45378.ll index 36b2de07bcc83..36bbdd4265b1e 100644 --- a/llvm/test/CodeGen/X86/pr45378.ll +++ b/llvm/test/CodeGen/X86/pr45378.ll @@ -6,7 +6,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512BW -declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) define i1 @parseHeaders(i64 * %ptr) nounwind { ; SSE2-LABEL: parseHeaders: @@ -34,7 +34,7 @@ define i1 @parseHeaders(i64 * %ptr) nounwind { ; AVX-NEXT: retq %vptr = bitcast i64 * %ptr to <2 x i64> * %vload = load <2 x i64>, <2 x i64> * %vptr, align 8 - %vreduce = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %vload) + %vreduce = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %vload) %vcheck = icmp eq i64 %vreduce, 0 ret i1 %vcheck } diff --git a/llvm/test/CodeGen/X86/vector-reduce-add.ll b/llvm/test/CodeGen/X86/vector-reduce-add.ll index 92f97d0469cd7..3ddaf62fe1366 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-add.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-add.ll @@ -32,7 +32,7 @@ define i64 @test_v2i64(<2 x i64> %a0) { ; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a0) ret i64 %1 } @@ -74,7 +74,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a0) ret i64 %1 } @@ -124,7 +124,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a0) ret i64 %1 } @@ -187,7 +187,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a0) ret i64 %1 } @@ -229,7 +229,7 @@ define i32 @test_v2i32(<2 x i32> %a0) { ; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a0) ret i32 %1 } @@ -276,7 +276,7 @@ define i32 @test_v4i32(<4 x i32> %a0) { ; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a0) ret i32 %1 } @@ -336,7 +336,7 @@ define i32 @test_v8i32(<8 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a0) ret i32 %1 } @@ -408,7 +408,7 @@ define i32 @test_v16i32(<16 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a0) ret i32 %1 } @@ -499,7 +499,7 @@ define i32 @test_v32i32(<32 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %a0) ret i32 %1 } @@ -547,7 +547,7 @@ define i16 @test_v2i16(<2 x i16> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a0) ret i16 %1 } @@ -601,7 +601,7 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a0) ret i16 %1 } @@ -663,7 +663,7 @@ define i16 @test_v8i16(<8 x i16> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a0) ret i16 %1 } @@ -738,7 +738,7 @@ define i16 @test_v16i16(<16 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a0) ret i16 %1 } @@ -826,7 +826,7 @@ define i16 @test_v32i16(<32 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> %a0) ret i16 %1 } @@ -933,7 +933,7 @@ define i16 @test_v64i16(<64 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> %a0) ret i16 %1 } @@ -966,7 +966,7 @@ define i8 @test_v2i8(<2 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a0) ret i8 %1 } @@ -1002,7 +1002,7 @@ define i8 @test_v2i8_load(<2 x i8>* %p) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %a0 = load <2 x i8>, <2 x i8>* %p - %1 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a0) ret i8 %1 } @@ -1043,7 +1043,7 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a0) ret i8 %1 } @@ -1075,7 +1075,7 @@ define i8 @test_v4i8_load(<4 x i8>* %p) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %a0 = load <4 x i8>, <4 x i8>* %p - %1 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a0) ret i8 %1 } @@ -1103,7 +1103,7 @@ define i8 @test_v8i8(<8 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a0) ret i8 %1 } @@ -1135,7 +1135,7 @@ define i8 @test_v8i8_load(<8 x i8>* %p) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %a0 = load <8 x i8>, <8 x i8>* %p - %1 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a0) ret i8 %1 } @@ -1169,7 +1169,7 @@ define i8 @test_v16i8(<16 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a0) ret i8 %1 } @@ -1223,7 +1223,7 @@ define i8 @test_v32i8(<32 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %a0) ret i8 %1 } @@ -1285,7 +1285,7 @@ define i8 @test_v64i8(<64 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> %a0) ret i8 %1 } @@ -1360,32 +1360,32 @@ define i8 @test_v128i8(<128 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> %a0) ret i8 %1 } -declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll index 3c5e46ec96438..e6b895692cc0d 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll @@ -59,7 +59,7 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) { ; AVX512VL-NEXT: sete %al ; AVX512VL-NEXT: retq %a = trunc <2 x i64> %0 to <2 x i1> - %b = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a) ret i1 %b } @@ -111,7 +111,7 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) { ; AVX512VL-NEXT: sete %al ; AVX512VL-NEXT: retq %a = trunc <4 x i32> %0 to <4 x i1> - %b = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) ret i1 %b } @@ -176,7 +176,7 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) { ; AVX512VL-NEXT: sete %al ; AVX512VL-NEXT: retq %a = trunc <8 x i8> %0 to <8 x i1> - %b = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) ret i1 %b } @@ -205,7 +205,7 @@ define i1 @trunc_v16i8_v16i1(<16 x i8>) { ; AVX512-NEXT: sete %al ; AVX512-NEXT: retq %a = trunc <16 x i8> %0 to <16 x i1> - %b = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) ret i1 %b } @@ -262,7 +262,7 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <4 x i64> %0 to <4 x i1> - %b = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) ret i1 %b } @@ -351,7 +351,7 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <8 x i32> %0 to <8 x i1> - %b = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) ret i1 %b } @@ -420,7 +420,7 @@ define i1 @trunc_v16i16_v16i1(<16 x i16>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <16 x i16> %0 to <16 x i1> - %b = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) ret i1 %b } @@ -492,7 +492,7 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <32 x i8> %0 to <32 x i1> - %b = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a) ret i1 %b } @@ -597,7 +597,7 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <8 x i64> %0 to <8 x i1> - %b = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) ret i1 %b } @@ -678,7 +678,7 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) { ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %a = trunc <16 x i32> %0 to <16 x i1> - %b = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) ret i1 %b } @@ -765,7 +765,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <32 x i16> %0 to <32 x i1> - %b = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a) ret i1 %b } @@ -845,7 +845,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <64 x i8> %0 to <64 x i1> - %b = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %a) ret i1 %b } @@ -905,7 +905,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; AVX512VL-NEXT: sete %al ; AVX512VL-NEXT: retq %a = icmp eq <2 x i64> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a) ret i1 %b } @@ -961,7 +961,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) { ; AVX512VL-NEXT: sete %al ; AVX512VL-NEXT: retq %a = icmp eq <4 x i32> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) ret i1 %b } @@ -1014,7 +1014,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) { ; AVX512VL-NEXT: sete %al ; AVX512VL-NEXT: retq %a = icmp eq <8 x i8> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) ret i1 %b } @@ -1062,7 +1062,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) { ; AVX512VL-NEXT: setb %al ; AVX512VL-NEXT: retq %a = icmp eq <16 x i8> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) ret i1 %b } @@ -1141,7 +1141,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <4 x i64> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) ret i1 %b } @@ -1207,7 +1207,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <8 x i32> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) ret i1 %b } @@ -1274,7 +1274,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <16 x i16> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) ret i1 %b } @@ -1354,7 +1354,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <32 x i8> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a) ret i1 %b } @@ -1447,7 +1447,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <8 x i64> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) ret i1 %b } @@ -1507,7 +1507,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) { ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %a = icmp eq <16 x i32> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) ret i1 %b } @@ -1595,7 +1595,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <32 x i16> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a) ret i1 %b } @@ -1686,13 +1686,13 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <64 x i8> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> %a) + %b = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %a) ret i1 %b } -declare i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1>) +declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>) +declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>) +declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>) +declare i1 @llvm.vector.reduce.and.v16i1(<16 x i1>) +declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1>) +declare i1 @llvm.vector.reduce.and.v64i1(<64 x i1>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll index 0df3238e6a2dc..64066be4cd2a7 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll @@ -28,7 +28,7 @@ define i1 @test_v2i64(<2 x i64> %a0) { ; AVX-NEXT: testq %rax, %rax ; AVX-NEXT: sete %al ; AVX-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a0) %2 = icmp eq i64 %1, 0 ret i1 %2 } @@ -79,7 +79,7 @@ define i1 @test_v4i64(<4 x i64> %a0) { ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %a0) %2 = icmp ne i64 %1, 0 ret i1 %2 } @@ -136,7 +136,7 @@ define i1 @test_v8i64(<8 x i64> %a0) { ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %a0) %2 = icmp eq i64 %1, 0 ret i1 %2 } @@ -202,7 +202,7 @@ define i1 @test_v16i64(<16 x i64> %a0) { ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> %a0) %2 = icmp ne i64 %1, 0 ret i1 %2 } @@ -229,7 +229,7 @@ define i1 @test_v2i32(<2 x i32> %a0) { ; AVX-NEXT: testl %eax, %eax ; AVX-NEXT: sete %al ; AVX-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a0) %2 = icmp eq i32 %1, 0 ret i1 %2 } @@ -256,7 +256,7 @@ define i1 @test_v4i32(<4 x i32> %a0) { ; AVX-NEXT: testl %eax, %eax ; AVX-NEXT: setne %al ; AVX-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a0) %2 = icmp ne i32 %1, 0 ret i1 %2 } @@ -315,7 +315,7 @@ define i1 @test_v8i32(<8 x i32> %a0) { ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %a0) %2 = icmp eq i32 %1, 0 ret i1 %2 } @@ -380,7 +380,7 @@ define i1 @test_v16i32(<16 x i32> %a0) { ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a0) %2 = icmp ne i32 %1, 0 ret i1 %2 } @@ -454,7 +454,7 @@ define i1 @test_v32i32(<32 x i32> %a0) { ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> %a0) %2 = icmp eq i32 %1, 0 ret i1 %2 } @@ -482,7 +482,7 @@ define i1 @test_v2i16(<2 x i16> %a0) { ; AVX-NEXT: testw %ax, %ax ; AVX-NEXT: sete %al ; AVX-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> %a0) %2 = icmp eq i16 %1, 0 ret i1 %2 } @@ -510,7 +510,7 @@ define i1 @test_v4i16(<4 x i16> %a0) { ; AVX-NEXT: testw %ax, %ax ; AVX-NEXT: setne %al ; AVX-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a0) %2 = icmp ne i16 %1, 0 ret i1 %2 } @@ -542,7 +542,7 @@ define i1 @test_v8i16(<8 x i16> %a0) { ; AVX-NEXT: testw %ax, %ax ; AVX-NEXT: sete %al ; AVX-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a0) %2 = icmp eq i16 %1, 0 ret i1 %2 } @@ -610,7 +610,7 @@ define i1 @test_v16i16(<16 x i16> %a0) { ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %a0) %2 = icmp ne i16 %1, 0 ret i1 %2 } @@ -684,7 +684,7 @@ define i1 @test_v32i16(<32 x i16> %a0) { ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> %a0) %2 = icmp eq i16 %1, 0 ret i1 %2 } @@ -767,7 +767,7 @@ define i1 @test_v64i16(<64 x i16> %a0) { ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> %a0) %2 = icmp ne i16 %1, 0 ret i1 %2 } @@ -795,7 +795,7 @@ define i1 @test_v2i8(<2 x i8> %a0) { ; AVX-NEXT: testb %al, %al ; AVX-NEXT: sete %al ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> %a0) %2 = icmp eq i8 %1, 0 ret i1 %2 } @@ -824,7 +824,7 @@ define i1 @test_v4i8(<4 x i8> %a0) { ; AVX-NEXT: testb %al, %al ; AVX-NEXT: setne %al ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a0) %2 = icmp ne i8 %1, 0 ret i1 %2 } @@ -857,7 +857,7 @@ define i1 @test_v8i8(<8 x i8> %a0) { ; AVX-NEXT: testb %al, %al ; AVX-NEXT: sete %al ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a0) %2 = icmp eq i8 %1, 0 ret i1 %2 } @@ -894,7 +894,7 @@ define i1 @test_v16i8(<16 x i8> %a0) { ; AVX-NEXT: testb %al, %al ; AVX-NEXT: setne %al ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a0) %2 = icmp ne i8 %1, 0 ret i1 %2 } @@ -971,7 +971,7 @@ define i1 @test_v32i8(<32 x i8> %a0) { ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %a0) %2 = icmp eq i8 %1, 0 ret i1 %2 } @@ -1054,7 +1054,7 @@ define i1 @test_v64i8(<64 x i8> %a0) { ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> %a0) %2 = icmp ne i8 %1, 0 ret i1 %2 } @@ -1146,33 +1146,33 @@ define i1 @test_v128i8(<128 x i8> %a0) { ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> %a0) %2 = icmp eq i8 %1, 0 ret i1 %2 } -declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.and.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.and.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.and.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.and.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.and.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.and.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.and.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.and.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.and.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.and.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.and.v128i8(<128 x i8>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-and.ll b/llvm/test/CodeGen/X86/vector-reduce-and.ll index 9545d29cee588..8f7b02fbd1002 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-and.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-and.ll @@ -24,7 +24,7 @@ define i64 @test_v2i64(<2 x i64> %a0) { ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovq %xmm0, %rax ; AVX-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a0) ret i64 %1 } @@ -66,7 +66,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %a0) ret i64 %1 } @@ -114,7 +114,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %a0) ret i64 %1 } @@ -171,7 +171,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> %a0) ret i64 %1 } @@ -193,7 +193,7 @@ define i32 @test_v2i32(<2 x i32> %a0) { ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a0) ret i32 %1 } @@ -215,7 +215,7 @@ define i32 @test_v4i32(<4 x i32> %a0) { ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a0) ret i32 %1 } @@ -265,7 +265,7 @@ define i32 @test_v8i32(<8 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %a0) ret i32 %1 } @@ -321,7 +321,7 @@ define i32 @test_v16i32(<16 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a0) ret i32 %1 } @@ -386,7 +386,7 @@ define i32 @test_v32i32(<32 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> %a0) ret i32 %1 } @@ -411,7 +411,7 @@ define i16 @test_v2i16(<2 x i16> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> %a0) ret i16 %1 } @@ -436,7 +436,7 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a0) ret i16 %1 } @@ -465,7 +465,7 @@ define i16 @test_v8i16(<8 x i16> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a0) ret i16 %1 } @@ -528,7 +528,7 @@ define i16 @test_v16i16(<16 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %a0) ret i16 %1 } @@ -597,7 +597,7 @@ define i16 @test_v32i16(<32 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> %a0) ret i16 %1 } @@ -675,7 +675,7 @@ define i16 @test_v64i16(<64 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> %a0) ret i16 %1 } @@ -700,7 +700,7 @@ define i8 @test_v2i8(<2 x i8> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> %a0) ret i8 %1 } @@ -726,7 +726,7 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a0) ret i8 %1 } @@ -756,7 +756,7 @@ define i8 @test_v8i8(<8 x i8> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a0) ret i8 %1 } @@ -790,7 +790,7 @@ define i8 @test_v16i8(<16 x i8> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a0) ret i8 %1 } @@ -862,7 +862,7 @@ define i8 @test_v32i8(<32 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %a0) ret i8 %1 } @@ -940,7 +940,7 @@ define i8 @test_v64i8(<64 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> %a0) ret i8 %1 } @@ -1027,32 +1027,32 @@ define i8 @test_v128i8(<128 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> %a0) ret i8 %1 } -declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.and.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.and.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.and.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.and.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.and.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.and.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.and.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.and.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.and.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.and.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.and.v128i8(<128 x i8>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll index e98833e3b4c21..ab913980cbe89 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll @@ -53,7 +53,7 @@ define float @test_v2f32(float %a0, <2 x float> %a1) { ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float %a0, <2 x float> %a1) + %1 = call fast float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float> %a1) ret float %1 } @@ -112,7 +112,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) { ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %a1) + %1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %a1) ret float %1 } @@ -185,7 +185,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float %a0, <8 x float> %a1) + %1 = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1) ret float %1 } @@ -268,7 +268,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float %a0, <16 x float> %a1) + %1 = call fast float @llvm.vector.reduce.fadd.f32.v16f32(float %a0, <16 x float> %a1) ret float %1 } @@ -313,7 +313,7 @@ define float @test_v2f32_zero(<2 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float 0.0, <2 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fadd.f32.v2f32(float 0.0, <2 x float> %a0) ret float %1 } @@ -367,7 +367,7 @@ define float @test_v4f32_zero(<4 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %a0) ret float %1 } @@ -435,7 +435,7 @@ define float @test_v8f32_zero(<8 x float> %a0) { ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.0, <8 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float 0.0, <8 x float> %a0) ret float %1 } @@ -513,7 +513,7 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.0, <16 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a0) ret float %1 } @@ -558,7 +558,7 @@ define float @test_v2f32_undef(<2 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float 0.0, <2 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fadd.f32.v2f32(float 0.0, <2 x float> %a0) ret float %1 } @@ -612,7 +612,7 @@ define float @test_v4f32_undef(<4 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %a0) ret float %1 } @@ -680,7 +680,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.0, <8 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float 0.0, <8 x float> %a0) ret float %1 } @@ -758,7 +758,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.0, <16 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a0) ret float %1 } @@ -801,7 +801,7 @@ define double @test_v2f64(double %a0, <2 x double> %a1) { ; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double %a0, <2 x double> %a1) + %1 = call fast double @llvm.vector.reduce.fadd.f64.v2f64(double %a0, <2 x double> %a1) ret double %1 } @@ -853,7 +853,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double %a0, <4 x double> %a1) + %1 = call fast double @llvm.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1) ret double %1 } @@ -912,7 +912,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double %a0, <8 x double> %a1) + %1 = call fast double @llvm.vector.reduce.fadd.f64.v8f64(double %a0, <8 x double> %a1) ret double %1 } @@ -982,7 +982,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v16f64(double %a0, <16 x double> %a1) + %1 = call fast double @llvm.vector.reduce.fadd.f64.v16f64(double %a0, <16 x double> %a1) ret double %1 } @@ -1021,7 +1021,7 @@ define double @test_v2f64_zero(<2 x double> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double 0.0, <2 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fadd.f64.v2f64(double 0.0, <2 x double> %a0) ret double %1 } @@ -1069,7 +1069,7 @@ define double @test_v4f64_zero(<4 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double 0.0, <4 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fadd.f64.v4f64(double 0.0, <4 x double> %a0) ret double %1 } @@ -1124,7 +1124,7 @@ define double @test_v8f64_zero(<8 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double 0.0, <8 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fadd.f64.v8f64(double 0.0, <8 x double> %a0) ret double %1 } @@ -1189,7 +1189,7 @@ define double @test_v16f64_zero(<16 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v16f64(double 0.0, <16 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fadd.f64.v16f64(double 0.0, <16 x double> %a0) ret double %1 } @@ -1228,7 +1228,7 @@ define double @test_v2f64_undef(<2 x double> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double 0.0, <2 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fadd.f64.v2f64(double 0.0, <2 x double> %a0) ret double %1 } @@ -1276,7 +1276,7 @@ define double @test_v4f64_undef(<4 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double 0.0, <4 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fadd.f64.v4f64(double 0.0, <4 x double> %a0) ret double %1 } @@ -1331,7 +1331,7 @@ define double @test_v8f64_undef(<8 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double 0.0, <8 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fadd.f64.v8f64(double 0.0, <8 x double> %a0) ret double %1 } @@ -1396,16 +1396,16 @@ define double @test_v16f64_undef(<16 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v16f64(double 0.0, <16 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fadd.f64.v16f64(double 0.0, <16 x double> %a0) ret double %1 } -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float, <2 x float>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float, <16 x float>) +declare float @llvm.vector.reduce.fadd.f32.v2f32(float, <2 x float>) +declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>) +declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double, <2 x double>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double, <4 x double>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double, <8 x double>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v16f64(double, <16 x double>) +declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>) +declare double @llvm.vector.reduce.fadd.f64.v4f64(double, <4 x double>) +declare double @llvm.vector.reduce.fadd.f64.v8f64(double, <8 x double>) +declare double @llvm.vector.reduce.fadd.f64.v16f64(double, <16 x double>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-fadd.ll b/llvm/test/CodeGen/X86/vector-reduce-fadd.ll index 7de6a25029e7b..ebcf1d38e23b9 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fadd.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fadd.ll @@ -39,7 +39,7 @@ define float @test_v2f32(float %a0, <2 x float> %a1) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3] ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float %a0, <2 x float> %a1) + %1 = call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float> %a1) ret float %1 } @@ -90,7 +90,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) { ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3] ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %a1) + %1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %a1) ret float %1 } @@ -176,7 +176,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float %a0, <8 x float> %a1) + %1 = call float @llvm.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1) ret float %1 } @@ -327,7 +327,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float %a0, <16 x float> %a1) + %1 = call float @llvm.vector.reduce.fadd.f32.v16f32(float %a0, <16 x float> %a1) ret float %1 } @@ -367,7 +367,7 @@ define float @test_v2f32_zero(<2 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float 0.0, <2 x float> %a0) + %1 = call float @llvm.vector.reduce.fadd.f32.v2f32(float 0.0, <2 x float> %a0) ret float %1 } @@ -422,7 +422,7 @@ define float @test_v4f32_zero(<4 x float> %a0) { ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %a0) + %1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %a0) ret float %1 } @@ -512,7 +512,7 @@ define float @test_v8f32_zero(<8 x float> %a0) { ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.0, <8 x float> %a0) + %1 = call float @llvm.vector.reduce.fadd.f32.v8f32(float 0.0, <8 x float> %a0) ret float %1 } @@ -667,7 +667,7 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.0, <16 x float> %a0) + %1 = call float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a0) ret float %1 } @@ -699,7 +699,7 @@ define float @test_v2f32_undef(<2 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX512-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float undef, <2 x float> %a0) + %1 = call float @llvm.vector.reduce.fadd.f32.v2f32(float undef, <2 x float> %a0) ret float %1 } @@ -746,7 +746,7 @@ define float @test_v4f32_undef(<4 x float> %a0) { ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float undef, <4 x float> %a0) + %1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %a0) ret float %1 } @@ -828,7 +828,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float undef, <8 x float> %a0) + %1 = call float @llvm.vector.reduce.fadd.f32.v8f32(float undef, <8 x float> %a0) ret float %1 } @@ -975,7 +975,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float undef, <16 x float> %a0) + %1 = call float @llvm.vector.reduce.fadd.f32.v16f32(float undef, <16 x float> %a0) ret float %1 } @@ -1004,7 +1004,7 @@ define double @test_v2f64(double %a0, <2 x double> %a1) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double %a0, <2 x double> %a1) + %1 = call double @llvm.vector.reduce.fadd.f64.v2f64(double %a0, <2 x double> %a1) ret double %1 } @@ -1042,7 +1042,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double %a0, <4 x double> %a1) + %1 = call double @llvm.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1) ret double %1 } @@ -1101,7 +1101,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double %a0, <8 x double> %a1) + %1 = call double @llvm.vector.reduce.fadd.f64.v8f64(double %a0, <8 x double> %a1) ret double %1 } @@ -1229,7 +1229,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v16f64(double %a0, <16 x double> %a1) + %1 = call double @llvm.vector.reduce.fadd.f64.v16f64(double %a0, <16 x double> %a1) ret double %1 } @@ -1261,7 +1261,7 @@ define double @test_v2f64_zero(<2 x double> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double 0.0, <2 x double> %a0) + %1 = call double @llvm.vector.reduce.fadd.f64.v2f64(double 0.0, <2 x double> %a0) ret double %1 } @@ -1302,7 +1302,7 @@ define double @test_v4f64_zero(<4 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double 0.0, <4 x double> %a0) + %1 = call double @llvm.vector.reduce.fadd.f64.v4f64(double 0.0, <4 x double> %a0) ret double %1 } @@ -1364,7 +1364,7 @@ define double @test_v8f64_zero(<8 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double 0.0, <8 x double> %a0) + %1 = call double @llvm.vector.reduce.fadd.f64.v8f64(double 0.0, <8 x double> %a0) ret double %1 } @@ -1467,7 +1467,7 @@ define double @test_v16f64_zero(<16 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v16f64(double 0.0, <16 x double> %a0) + %1 = call double @llvm.vector.reduce.fadd.f64.v16f64(double 0.0, <16 x double> %a0) ret double %1 } @@ -1493,7 +1493,7 @@ define double @test_v2f64_undef(<2 x double> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double undef, <2 x double> %a0) + %1 = call double @llvm.vector.reduce.fadd.f64.v2f64(double undef, <2 x double> %a0) ret double %1 } @@ -1528,7 +1528,7 @@ define double @test_v4f64_undef(<4 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double undef, <4 x double> %a0) + %1 = call double @llvm.vector.reduce.fadd.f64.v4f64(double undef, <4 x double> %a0) ret double %1 } @@ -1584,7 +1584,7 @@ define double @test_v8f64_undef(<8 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double undef, <8 x double> %a0) + %1 = call double @llvm.vector.reduce.fadd.f64.v8f64(double undef, <8 x double> %a0) ret double %1 } @@ -1681,16 +1681,16 @@ define double @test_v16f64_undef(<16 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v16f64(double undef, <16 x double> %a0) + %1 = call double @llvm.vector.reduce.fadd.f64.v16f64(double undef, <16 x double> %a0) ret double %1 } -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float, <2 x float>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float, <16 x float>) +declare float @llvm.vector.reduce.fadd.f32.v2f32(float, <2 x float>) +declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>) +declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double, <2 x double>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double, <4 x double>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double, <8 x double>) -declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v16f64(double, <16 x double>) +declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>) +declare double @llvm.vector.reduce.fadd.f64.v4f64(double, <4 x double>) +declare double @llvm.vector.reduce.fadd.f64.v8f64(double, <8 x double>) +declare double @llvm.vector.reduce.fadd.f64.v16f64(double, <16 x double>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll index 50b88c2c55f5c..01b41c7ed7a7f 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll @@ -39,7 +39,7 @@ define float @test_v2f32(<2 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a0) ret float %1 } @@ -78,7 +78,7 @@ define float @test_v4f32(<4 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a0) ret float %1 } @@ -125,7 +125,7 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a0) ret float %1 } @@ -179,7 +179,7 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a0) ret float %1 } @@ -206,7 +206,7 @@ define double @test_v2f64(<2 x double> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a0) ret double %1 } @@ -236,7 +236,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %a0) ret double %1 } @@ -271,7 +271,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a0) ret double %1 } @@ -313,16 +313,16 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fmax.v16f64(<16 x double> %a0) ret double %1 } -declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float>) +declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) +declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) +declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>) -declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double>) +declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) +declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>) +declare double @llvm.vector.reduce.fmax.v16f64(<16 x double>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll index f4539c572375a..091990a90fbb8 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll @@ -35,7 +35,7 @@ define float @test_v2f32(<2 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %a0) + %1 = call nnan float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a0) ret float %1 } @@ -84,7 +84,7 @@ define float @test_v4f32(<4 x float> %a0) { ; AVX512-NEXT: vmaxss %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a0) + %1 = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a0) ret float %1 } @@ -155,7 +155,7 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX512-NEXT: vmaxss %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %a0) + %1 = call nnan float @llvm.vector.reduce.fmax.v8f32(<8 x float> %a0) ret float %1 } @@ -247,7 +247,7 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX512-NEXT: vmaxss %xmm8, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a0) + %1 = call nnan float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a0) ret float %1 } @@ -274,7 +274,7 @@ define double @test_v2f64(<2 x double> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a0) + %1 = call nnan double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a0) ret double %1 } @@ -316,7 +316,7 @@ define double @test_v3f64(<3 x double> %a0) { ; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v3f64(<3 x double> %a0) + %1 = call nnan double @llvm.vector.reduce.fmax.v3f64(<3 x double> %a0) ret double %1 } @@ -350,7 +350,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX512-NEXT: vmaxsd %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %a0) + %1 = call nnan double @llvm.vector.reduce.fmax.v4f64(<4 x double> %a0) ret double %1 } @@ -395,7 +395,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX512-NEXT: vmaxsd %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> %a0) + %1 = call nnan double @llvm.vector.reduce.fmax.v8f64(<8 x double> %a0) ret double %1 } @@ -447,7 +447,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> %a0) + %1 = call nnan double @llvm.vector.reduce.fmax.v16f64(<16 x double> %a0) ret double %1 } @@ -511,18 +511,18 @@ define half @test_v2f16(<2 x half> %a0) nounwind { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call nnan half @llvm.experimental.vector.reduce.fmax.v2f16(<2 x half> %a0) + %1 = call nnan half @llvm.vector.reduce.fmax.v2f16(<2 x half> %a0) ret half %1 } -declare float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float>) +declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>) +declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) +declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>) -declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v3f64(<3 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double>) +declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmax.v3f64(<3 x double>) +declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) +declare double @llvm.vector.reduce.fmax.v8f64(<8 x double>) +declare double @llvm.vector.reduce.fmax.v16f64(<16 x double>) -declare half @llvm.experimental.vector.reduce.fmax.v2f16(<2 x half>) +declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax.ll index c5e025be5423a..af21b7338f991 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmax.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmax.ll @@ -14,7 +14,7 @@ define float @test_v1f32(<1 x float> %a0) { ; ALL-LABEL: test_v1f32: ; ALL: # %bb.0: ; ALL-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a0) + %1 = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a0) ret float %1 } @@ -62,7 +62,7 @@ define float @test_v2f32(<2 x float> %a0) { ; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %a0) + %1 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a0) ret float %1 } @@ -133,7 +133,7 @@ define float @test_v3f32(<3 x float> %a0) { ; AVX512-NEXT: vmaxss %xmm2, %xmm1, %xmm0 ; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a0) + %1 = call float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a0) ret float %1 } @@ -230,7 +230,7 @@ define float @test_v4f32(<4 x float> %a0) { ; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a0) + %1 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a0) ret float %1 } @@ -401,7 +401,7 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX512VL-NEXT: vmovss %xmm8, %xmm0, %xmm0 {%k1} ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %a0) + %1 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %a0) ret float %1 } @@ -661,7 +661,7 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX512VL-NEXT: vmovss %xmm8, %xmm0, %xmm0 {%k1} ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a0) + %1 = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a0) ret float %1 } @@ -700,7 +700,7 @@ define double @test_v2f64(<2 x double> %a0) { ; AVX512-NEXT: vmovsd %xmm2, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmovapd %xmm1, %xmm0 ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a0) + %1 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a0) ret double %1 } @@ -774,7 +774,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %a0) + %1 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %a0) ret double %1 } @@ -922,7 +922,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX512VL-NEXT: vmovsd %xmm8, %xmm0, %xmm0 {%k1} ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> %a0) + %1 = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %a0) ret double %1 } @@ -1091,18 +1091,18 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> %a0) + %1 = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> %a0) ret double %1 } -declare float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float>) -declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float>) +declare float @llvm.vector.reduce.fmax.v1f32(<1 x float>) +declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>) +declare float @llvm.vector.reduce.fmax.v3f32(<3 x float>) +declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) +declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>) -declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double>) -declare double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double>) +declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) +declare double @llvm.vector.reduce.fmax.v8f64(<8 x double>) +declare double @llvm.vector.reduce.fmax.v16f64(<16 x double>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll index 5846f588581d0..de7c67c5ca59b 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll @@ -14,7 +14,7 @@ define float @test_v1f32(<1 x float> %a0) { ; ALL-LABEL: test_v1f32: ; ALL: # %bb.0: ; ALL-NEXT: retq - %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> %a0) + %1 = call nnan float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a0) ret float %1 } @@ -43,7 +43,7 @@ define float @test_v2f32(<2 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %a0) + %1 = call nnan float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a0) ret float %1 } @@ -84,7 +84,7 @@ define float @test_v3f32(<3 x float> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq - %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a0) + %1 = call nnan float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a0) ret float %1 } @@ -133,7 +133,7 @@ define float @test_v4f32(<4 x float> %a0) { ; AVX512-NEXT: vminss %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a0) + %1 = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a0) ret float %1 } @@ -204,7 +204,7 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX512-NEXT: vminss %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %a0) + %1 = call nnan float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a0) ret float %1 } @@ -296,7 +296,7 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX512-NEXT: vminss %xmm8, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %a0) + %1 = call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a0) ret float %1 } @@ -323,7 +323,7 @@ define double @test_v2f64(<2 x double> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call nnan double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a0) + %1 = call nnan double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a0) ret double %1 } @@ -357,7 +357,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX512-NEXT: vminsd %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call nnan double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %a0) + %1 = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a0) ret double %1 } @@ -402,7 +402,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX512-NEXT: vminsd %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call nnan double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> %a0) + %1 = call nnan double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a0) ret double %1 } @@ -454,7 +454,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX512-NEXT: vminsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call nnan double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> %a0) + %1 = call nnan double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a0) ret double %1 } @@ -518,20 +518,20 @@ define half @test_v2f16(<2 x half> %a0) nounwind { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call nnan half @llvm.experimental.vector.reduce.fmin.v2f16(<2 x half> %a0) + %1 = call nnan half @llvm.vector.reduce.fmin.v2f16(<2 x half> %a0) ret half %1 } -declare float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float>) +declare float @llvm.vector.reduce.fmin.v1f32(<1 x float>) +declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) +declare float @llvm.vector.reduce.fmin.v3f32(<3 x float>) +declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) +declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>) -declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double>) +declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) +declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>) +declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>) -declare half @llvm.experimental.vector.reduce.fmin.v2f16(<2 x half>) +declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin.ll index 1d7436eaa8a44..10e58428d3750 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmin.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmin.ll @@ -54,7 +54,7 @@ define float @test_v2f32(<2 x float> %a0) { ; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %a0) + %1 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a0) ret float %1 } @@ -151,7 +151,7 @@ define float @test_v4f32(<4 x float> %a0) { ; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a0) + %1 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a0) ret float %1 } @@ -322,7 +322,7 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX512VL-NEXT: vmovss %xmm8, %xmm0, %xmm0 {%k1} ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %a0) + %1 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a0) ret float %1 } @@ -582,7 +582,7 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX512VL-NEXT: vmovss %xmm8, %xmm0, %xmm0 {%k1} ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %a0) + %1 = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a0) ret float %1 } @@ -621,7 +621,7 @@ define double @test_v2f64(<2 x double> %a0) { ; AVX512-NEXT: vmovsd %xmm2, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmovapd %xmm1, %xmm0 ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a0) + %1 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a0) ret double %1 } @@ -691,7 +691,7 @@ define double @test_v3f64(<3 x double> %a0) { ; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fmin.v3f64(<3 x double> %a0) + %1 = call double @llvm.vector.reduce.fmin.v3f64(<3 x double> %a0) ret double %1 } @@ -765,7 +765,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %a0) + %1 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a0) ret double %1 } @@ -913,7 +913,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX512VL-NEXT: vmovsd %xmm8, %xmm0, %xmm0 {%k1} ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> %a0) + %1 = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a0) ret double %1 } @@ -1082,17 +1082,17 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> %a0) + %1 = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a0) ret double %1 } -declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>) -declare float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float>) +declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) +declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) +declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>) -declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v3f64(<3 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double>) -declare double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double>) +declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmin.v3f64(<3 x double>) +declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) +declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>) +declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll index e99946edeee8a..7840ae8ceed61 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll @@ -39,7 +39,7 @@ define float @test_v2f32(float %a0, <2 x float> %a1) { ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float %a0, <2 x float> %a1) + %1 = call fast float @llvm.vector.reduce.fmul.f32.v2f32(float %a0, <2 x float> %a1) ret float %1 } @@ -82,7 +82,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) { ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %a0, <4 x float> %a1) + %1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float %a0, <4 x float> %a1) ret float %1 } @@ -133,7 +133,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float %a0, <8 x float> %a1) + %1 = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float %a0, <8 x float> %a1) ret float %1 } @@ -191,7 +191,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float %a0, <16 x float> %a1) + %1 = call fast float @llvm.vector.reduce.fmul.f32.v16f32(float %a0, <16 x float> %a1) ret float %1 } @@ -225,7 +225,7 @@ define float @test_v2f32_zero(<2 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float 1.0, <2 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %a0) ret float %1 } @@ -265,7 +265,7 @@ define float @test_v4f32_zero(<4 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a0) ret float %1 } @@ -313,7 +313,7 @@ define float @test_v8f32_zero(<8 x float> %a0) { ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float 1.0, <8 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a0) ret float %1 } @@ -368,7 +368,7 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float 1.0, <16 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fmul.f32.v16f32(float 1.0, <16 x float> %a0) ret float %1 } @@ -402,7 +402,7 @@ define float @test_v2f32_undef(<2 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float 1.0, <2 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %a0) ret float %1 } @@ -442,7 +442,7 @@ define float @test_v4f32_undef(<4 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a0) ret float %1 } @@ -490,7 +490,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float 1.0, <8 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a0) ret float %1 } @@ -545,7 +545,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float 1.0, <16 x float> %a0) + %1 = call fast float @llvm.vector.reduce.fmul.f32.v16f32(float 1.0, <16 x float> %a0) ret float %1 } @@ -575,7 +575,7 @@ define double @test_v2f64(double %a0, <2 x double> %a1) { ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double %a0, <2 x double> %a1) + %1 = call fast double @llvm.vector.reduce.fmul.f64.v2f64(double %a0, <2 x double> %a1) ret double %1 } @@ -608,7 +608,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) { ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double %a0, <4 x double> %a1) + %1 = call fast double @llvm.vector.reduce.fmul.f64.v4f64(double %a0, <4 x double> %a1) ret double %1 } @@ -646,7 +646,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) { ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v8f64(double %a0, <8 x double> %a1) + %1 = call fast double @llvm.vector.reduce.fmul.f64.v8f64(double %a0, <8 x double> %a1) ret double %1 } @@ -691,7 +691,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) { ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v16f64(double %a0, <16 x double> %a1) + %1 = call fast double @llvm.vector.reduce.fmul.f64.v16f64(double %a0, <16 x double> %a1) ret double %1 } @@ -719,7 +719,7 @@ define double @test_v2f64_zero(<2 x double> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double 1.0, <2 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a0) ret double %1 } @@ -750,7 +750,7 @@ define double @test_v4f64_zero(<4 x double> %a0) { ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double 1.0, <4 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %a0) ret double %1 } @@ -786,7 +786,7 @@ define double @test_v8f64_zero(<8 x double> %a0) { ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v8f64(double 1.0, <8 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fmul.f64.v8f64(double 1.0, <8 x double> %a0) ret double %1 } @@ -828,7 +828,7 @@ define double @test_v16f64_zero(<16 x double> %a0) { ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v16f64(double 1.0, <16 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fmul.f64.v16f64(double 1.0, <16 x double> %a0) ret double %1 } @@ -856,7 +856,7 @@ define double @test_v2f64_undef(<2 x double> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double 1.0, <2 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a0) ret double %1 } @@ -887,7 +887,7 @@ define double @test_v4f64_undef(<4 x double> %a0) { ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double 1.0, <4 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %a0) ret double %1 } @@ -923,7 +923,7 @@ define double @test_v8f64_undef(<8 x double> %a0) { ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v8f64(double 1.0, <8 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fmul.f64.v8f64(double 1.0, <8 x double> %a0) ret double %1 } @@ -965,16 +965,16 @@ define double @test_v16f64_undef(<16 x double> %a0) { ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v16f64(double 1.0, <16 x double> %a0) + %1 = call fast double @llvm.vector.reduce.fmul.f64.v16f64(double 1.0, <16 x double> %a0) ret double %1 } -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float, <2 x float>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float, <8 x float>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float, <16 x float>) +declare float @llvm.vector.reduce.fmul.f32.v2f32(float, <2 x float>) +declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fmul.f32.v8f32(float, <8 x float>) +declare float @llvm.vector.reduce.fmul.f32.v16f32(float, <16 x float>) -declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double, <2 x double>) -declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double, <4 x double>) -declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v8f64(double, <8 x double>) -declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v16f64(double, <16 x double>) +declare double @llvm.vector.reduce.fmul.f64.v2f64(double, <2 x double>) +declare double @llvm.vector.reduce.fmul.f64.v4f64(double, <4 x double>) +declare double @llvm.vector.reduce.fmul.f64.v8f64(double, <8 x double>) +declare double @llvm.vector.reduce.fmul.f64.v16f64(double, <16 x double>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmul.ll b/llvm/test/CodeGen/X86/vector-reduce-fmul.ll index 7a1c044a0042d..3693ef07aecf1 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmul.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmul.ll @@ -38,7 +38,7 @@ define float @test_v2f32(float %a0, <2 x float> %a1) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3] ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float %a0, <2 x float> %a1) + %1 = call float @llvm.vector.reduce.fmul.f32.v2f32(float %a0, <2 x float> %a1) ret float %1 } @@ -89,7 +89,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) { ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3] ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %a0, <4 x float> %a1) + %1 = call float @llvm.vector.reduce.fmul.f32.v4f32(float %a0, <4 x float> %a1) ret float %1 } @@ -175,7 +175,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float %a0, <8 x float> %a1) + %1 = call float @llvm.vector.reduce.fmul.f32.v8f32(float %a0, <8 x float> %a1) ret float %1 } @@ -326,7 +326,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float %a0, <16 x float> %a1) + %1 = call float @llvm.vector.reduce.fmul.f32.v16f32(float %a0, <16 x float> %a1) ret float %1 } @@ -360,7 +360,7 @@ define float @test_v2f32_one(<2 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float 1.0, <2 x float> %a0) + %1 = call float @llvm.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %a0) ret float %1 } @@ -407,7 +407,7 @@ define float @test_v4f32_one(<4 x float> %a0) { ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %a0) + %1 = call float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a0) ret float %1 } @@ -489,7 +489,7 @@ define float @test_v8f32_one(<8 x float> %a0) { ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float 1.0, <8 x float> %a0) + %1 = call float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a0) ret float %1 } @@ -636,7 +636,7 @@ define float @test_v16f32_one(<16 x float> %a0) { ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float 1.0, <16 x float> %a0) + %1 = call float @llvm.vector.reduce.fmul.f32.v16f32(float 1.0, <16 x float> %a0) ret float %1 } @@ -668,7 +668,7 @@ define float @test_v2f32_undef(<2 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float undef, <2 x float> %a0) + %1 = call float @llvm.vector.reduce.fmul.f32.v2f32(float undef, <2 x float> %a0) ret float %1 } @@ -715,7 +715,7 @@ define float @test_v4f32_undef(<4 x float> %a0) { ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float undef, <4 x float> %a0) + %1 = call float @llvm.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %a0) ret float %1 } @@ -797,7 +797,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float undef, <8 x float> %a0) + %1 = call float @llvm.vector.reduce.fmul.f32.v8f32(float undef, <8 x float> %a0) ret float %1 } @@ -944,7 +944,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float undef, <16 x float> %a0) + %1 = call float @llvm.vector.reduce.fmul.f32.v16f32(float undef, <16 x float> %a0) ret float %1 } @@ -973,7 +973,7 @@ define double @test_v2f64(double %a0, <2 x double> %a1) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double %a0, <2 x double> %a1) + %1 = call double @llvm.vector.reduce.fmul.f64.v2f64(double %a0, <2 x double> %a1) ret double %1 } @@ -1011,7 +1011,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) { ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double %a0, <4 x double> %a1) + %1 = call double @llvm.vector.reduce.fmul.f64.v4f64(double %a0, <4 x double> %a1) ret double %1 } @@ -1070,7 +1070,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) { ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v8f64(double %a0, <8 x double> %a1) + %1 = call double @llvm.vector.reduce.fmul.f64.v8f64(double %a0, <8 x double> %a1) ret double %1 } @@ -1198,7 +1198,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) { ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v16f64(double %a0, <16 x double> %a1) + %1 = call double @llvm.vector.reduce.fmul.f64.v16f64(double %a0, <16 x double> %a1) ret double %1 } @@ -1226,7 +1226,7 @@ define double @test_v2f64_one(<2 x double> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double 1.0, <2 x double> %a0) + %1 = call double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a0) ret double %1 } @@ -1263,7 +1263,7 @@ define double @test_v4f64_one(<4 x double> %a0) { ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double 1.0, <4 x double> %a0) + %1 = call double @llvm.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %a0) ret double %1 } @@ -1321,7 +1321,7 @@ define double @test_v8f64_one(<8 x double> %a0) { ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v8f64(double 1.0, <8 x double> %a0) + %1 = call double @llvm.vector.reduce.fmul.f64.v8f64(double 1.0, <8 x double> %a0) ret double %1 } @@ -1419,7 +1419,7 @@ define double @test_v16f64_one(<16 x double> %a0) { ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v16f64(double 1.0, <16 x double> %a0) + %1 = call double @llvm.vector.reduce.fmul.f64.v16f64(double 1.0, <16 x double> %a0) ret double %1 } @@ -1445,7 +1445,7 @@ define double @test_v2f64_undef(<2 x double> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double undef, <2 x double> %a0) + %1 = call double @llvm.vector.reduce.fmul.f64.v2f64(double undef, <2 x double> %a0) ret double %1 } @@ -1480,7 +1480,7 @@ define double @test_v4f64_undef(<4 x double> %a0) { ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double undef, <4 x double> %a0) + %1 = call double @llvm.vector.reduce.fmul.f64.v4f64(double undef, <4 x double> %a0) ret double %1 } @@ -1536,7 +1536,7 @@ define double @test_v8f64_undef(<8 x double> %a0) { ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v8f64(double undef, <8 x double> %a0) + %1 = call double @llvm.vector.reduce.fmul.f64.v8f64(double undef, <8 x double> %a0) ret double %1 } @@ -1633,16 +1633,16 @@ define double @test_v16f64_undef(<16 x double> %a0) { ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v16f64(double undef, <16 x double> %a0) + %1 = call double @llvm.vector.reduce.fmul.f64.v16f64(double undef, <16 x double> %a0) ret double %1 } -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float, <2 x float>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float, <8 x float>) -declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float, <16 x float>) +declare float @llvm.vector.reduce.fmul.f32.v2f32(float, <2 x float>) +declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fmul.f32.v8f32(float, <8 x float>) +declare float @llvm.vector.reduce.fmul.f32.v16f32(float, <16 x float>) -declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double, <2 x double>) -declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double, <4 x double>) -declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v8f64(double, <8 x double>) -declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v16f64(double, <16 x double>) +declare double @llvm.vector.reduce.fmul.f64.v2f64(double, <2 x double>) +declare double @llvm.vector.reduce.fmul.f64.v4f64(double, <4 x double>) +declare double @llvm.vector.reduce.fmul.f64.v8f64(double, <8 x double>) +declare double @llvm.vector.reduce.fmul.f64.v16f64(double, <16 x double>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul.ll b/llvm/test/CodeGen/X86/vector-reduce-mul.ll index 17671c6a55918..d6cac398dfc07 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-mul.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-mul.ll @@ -85,7 +85,7 @@ define i64 @test_v2i64(<2 x i64> %a0) { ; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0 ; AVX512DQVL-NEXT: vmovq %xmm0, %rax ; AVX512DQVL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %a0) ret i64 %1 } @@ -231,7 +231,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX512DQVL-NEXT: vmovq %xmm0, %rax ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> %a0) ret i64 %1 } @@ -443,7 +443,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX512DQVL-NEXT: vmovq %xmm0, %rax ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> %a0) ret i64 %1 } @@ -763,7 +763,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX512DQVL-NEXT: vmovq %xmm0, %rax ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> %a0) ret i64 %1 } @@ -799,7 +799,7 @@ define i32 @test_v2i32(<2 x i32> %a0) { ; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %a0) ret i32 %1 } @@ -841,7 +841,7 @@ define i32 @test_v4i32(<4 x i32> %a0) { ; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %a0) ret i32 %1 } @@ -905,7 +905,7 @@ define i32 @test_v8i32(<8 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %a0) ret i32 %1 } @@ -983,7 +983,7 @@ define i32 @test_v16i32(<16 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %a0) ret i32 %1 } @@ -1086,7 +1086,7 @@ define i32 @test_v32i32(<32 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> %a0) ret i32 %1 } @@ -1119,7 +1119,7 @@ define i16 @test_v2i16(<2 x i16> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> %a0) ret i16 %1 } @@ -1154,7 +1154,7 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> %a0) ret i16 %1 } @@ -1195,7 +1195,7 @@ define i16 @test_v8i16(<8 x i16> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %a0) ret i16 %1 } @@ -1258,7 +1258,7 @@ define i16 @test_v16i16(<16 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %a0) ret i16 %1 } @@ -1380,7 +1380,7 @@ define i16 @test_v32i16(<32 x i16> %a0) { ; AVX512DQVL-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> %a0) ret i16 %1 } @@ -1522,7 +1522,7 @@ define i16 @test_v64i16(<64 x i16> %a0) { ; AVX512DQVL-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> %a0) ret i16 %1 } @@ -1555,7 +1555,7 @@ define i8 @test_v2i8(<2 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> %a0) ret i8 %1 } @@ -1607,7 +1607,7 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> %a0) ret i8 %1 } @@ -1668,7 +1668,7 @@ define i8 @test_v8i8(<8 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> %a0) ret i8 %1 } @@ -1842,7 +1842,7 @@ define i8 @test_v16i8(<16 x i8> %a0) { ; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %a0) ret i8 %1 } @@ -2051,7 +2051,7 @@ define i8 @test_v32i8(<32 x i8> %a0) { ; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %a0) ret i8 %1 } @@ -2325,7 +2325,7 @@ define i8 @test_v64i8(<64 x i8> %a0) { ; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> %a0) ret i8 %1 } @@ -2685,32 +2685,32 @@ define i8 @test_v128i8(<128 x i8> %a0) { ; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> %a0) ret i8 %1 } -declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.mul.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.mul.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.mul.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.mul.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.mul.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.mul.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.mul.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.mul.v128i8(<128 x i8>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll index a497c83d2785e..a5458d5264bbc 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll @@ -57,7 +57,7 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) { ; AVX512VL-NEXT: setne %al ; AVX512VL-NEXT: retq %a = trunc <2 x i64> %0 to <2 x i1> - %b = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %a) ret i1 %b } @@ -107,7 +107,7 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) { ; AVX512VL-NEXT: setne %al ; AVX512VL-NEXT: retq %a = trunc <4 x i32> %0 to <4 x i1> - %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a) ret i1 %b } @@ -169,7 +169,7 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) { ; AVX512VL-NEXT: setne %al ; AVX512VL-NEXT: retq %a = trunc <8 x i8> %0 to <8 x i1> - %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a) ret i1 %b } @@ -198,7 +198,7 @@ define i1 @trunc_v16i8_v16i1(<16 x i8>) { ; AVX512-NEXT: setne %al ; AVX512-NEXT: retq %a = trunc <16 x i8> %0 to <16 x i1> - %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) ret i1 %b } @@ -253,7 +253,7 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <4 x i64> %0 to <4 x i1> - %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a) ret i1 %b } @@ -338,7 +338,7 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <8 x i32> %0 to <8 x i1> - %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a) ret i1 %b } @@ -407,7 +407,7 @@ define i1 @trunc_v16i16_v16i1(<16 x i16>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <16 x i16> %0 to <16 x i1> - %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) ret i1 %b } @@ -479,7 +479,7 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <32 x i8> %0 to <32 x i1> - %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a) ret i1 %b } @@ -580,7 +580,7 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <8 x i64> %0 to <8 x i1> - %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a) ret i1 %b } @@ -661,7 +661,7 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) { ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %a = trunc <16 x i32> %0 to <16 x i1> - %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) ret i1 %b } @@ -748,7 +748,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <32 x i16> %0 to <32 x i1> - %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a) ret i1 %b } @@ -828,7 +828,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <64 x i8> %0 to <64 x i1> - %b = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> %a) ret i1 %b } @@ -894,7 +894,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; AVX512VL-NEXT: setne %al ; AVX512VL-NEXT: retq %a = icmp eq <2 x i64> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %a) ret i1 %b } @@ -945,7 +945,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) { ; AVX512VL-NEXT: setne %al ; AVX512VL-NEXT: retq %a = icmp eq <4 x i32> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a) ret i1 %b } @@ -998,7 +998,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) { ; AVX512VL-NEXT: setne %al ; AVX512VL-NEXT: retq %a = icmp eq <8 x i8> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a) ret i1 %b } @@ -1046,7 +1046,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) { ; AVX512VL-NEXT: setne %al ; AVX512VL-NEXT: retq %a = icmp eq <16 x i8> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) ret i1 %b } @@ -1129,7 +1129,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <4 x i64> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a) ret i1 %b } @@ -1197,7 +1197,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <8 x i32> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a) ret i1 %b } @@ -1264,7 +1264,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <16 x i16> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) ret i1 %b } @@ -1341,7 +1341,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <32 x i8> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a) ret i1 %b } @@ -1441,7 +1441,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <8 x i64> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a) ret i1 %b } @@ -1499,7 +1499,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) { ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %a = icmp eq <16 x i32> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) ret i1 %b } @@ -1587,7 +1587,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <32 x i16> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a) ret i1 %b } @@ -1680,13 +1680,13 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <64 x i8> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> %a) + %b = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> %a) ret i1 %b } -declare i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1>) +declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>) +declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>) +declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1>) +declare i1 @llvm.vector.reduce.or.v16i1(<16 x i1>) +declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1>) +declare i1 @llvm.vector.reduce.or.v64i1(<64 x i1>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll index e1253975d5a6a..9cc38d8ced0ba 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll @@ -31,7 +31,7 @@ define i1 @test_v2i64(<2 x i64> %a0) { ; AVX-NEXT: vptest %xmm0, %xmm0 ; AVX-NEXT: sete %al ; AVX-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a0) %2 = icmp eq i64 %1, 0 ret i1 %2 } @@ -60,7 +60,7 @@ define i1 @test_v4i64(<4 x i64> %a0) { ; AVX-NEXT: setne %al ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %a0) %2 = icmp ne i64 %1, 0 ret i1 %2 } @@ -111,7 +111,7 @@ define i1 @test_v8i64(<8 x i64> %a0) { ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %a0) %2 = icmp eq i64 %1, 0 ret i1 %2 } @@ -175,7 +175,7 @@ define i1 @test_v16i64(<16 x i64> %a0) { ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> %a0) %2 = icmp ne i64 %1, 0 ret i1 %2 } @@ -198,7 +198,7 @@ define i1 @test_v2i32(<2 x i32> %a0) { ; AVX-NEXT: testq %rax, %rax ; AVX-NEXT: sete %al ; AVX-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a0) %2 = icmp eq i32 %1, 0 ret i1 %2 } @@ -224,7 +224,7 @@ define i1 @test_v4i32(<4 x i32> %a0) { ; AVX-NEXT: vptest %xmm0, %xmm0 ; AVX-NEXT: setne %al ; AVX-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a0) %2 = icmp ne i32 %1, 0 ret i1 %2 } @@ -253,7 +253,7 @@ define i1 @test_v8i32(<8 x i32> %a0) { ; AVX-NEXT: sete %al ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a0) %2 = icmp eq i32 %1, 0 ret i1 %2 } @@ -304,7 +304,7 @@ define i1 @test_v16i32(<16 x i32> %a0) { ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %a0) %2 = icmp ne i32 %1, 0 ret i1 %2 } @@ -368,7 +368,7 @@ define i1 @test_v32i32(<32 x i32> %a0) { ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> %a0) %2 = icmp eq i32 %1, 0 ret i1 %2 } @@ -391,7 +391,7 @@ define i1 @test_v2i16(<2 x i16> %a0) { ; AVX-NEXT: testl %eax, %eax ; AVX-NEXT: sete %al ; AVX-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> %a0) %2 = icmp eq i16 %1, 0 ret i1 %2 } @@ -410,7 +410,7 @@ define i1 @test_v4i16(<4 x i16> %a0) { ; AVX-NEXT: testq %rax, %rax ; AVX-NEXT: setne %al ; AVX-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a0) %2 = icmp ne i16 %1, 0 ret i1 %2 } @@ -436,7 +436,7 @@ define i1 @test_v8i16(<8 x i16> %a0) { ; AVX-NEXT: vptest %xmm0, %xmm0 ; AVX-NEXT: sete %al ; AVX-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a0) %2 = icmp eq i16 %1, 0 ret i1 %2 } @@ -465,7 +465,7 @@ define i1 @test_v16i16(<16 x i16> %a0) { ; AVX-NEXT: setne %al ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %a0) %2 = icmp ne i16 %1, 0 ret i1 %2 } @@ -516,7 +516,7 @@ define i1 @test_v32i16(<32 x i16> %a0) { ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> %a0) %2 = icmp eq i16 %1, 0 ret i1 %2 } @@ -580,7 +580,7 @@ define i1 @test_v64i16(<64 x i16> %a0) { ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> %a0) %2 = icmp ne i16 %1, 0 ret i1 %2 } @@ -603,7 +603,7 @@ define i1 @test_v2i8(<2 x i8> %a0) { ; AVX-NEXT: testw %ax, %ax ; AVX-NEXT: sete %al ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> %a0) %2 = icmp eq i8 %1, 0 ret i1 %2 } @@ -622,7 +622,7 @@ define i1 @test_v4i8(<4 x i8> %a0) { ; AVX-NEXT: testl %eax, %eax ; AVX-NEXT: setne %al ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %a0) %2 = icmp ne i8 %1, 0 ret i1 %2 } @@ -641,7 +641,7 @@ define i1 @test_v8i8(<8 x i8> %a0) { ; AVX-NEXT: testq %rax, %rax ; AVX-NEXT: sete %al ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a0) %2 = icmp eq i8 %1, 0 ret i1 %2 } @@ -667,7 +667,7 @@ define i1 @test_v16i8(<16 x i8> %a0) { ; AVX-NEXT: vptest %xmm0, %xmm0 ; AVX-NEXT: setne %al ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a0) %2 = icmp ne i8 %1, 0 ret i1 %2 } @@ -696,7 +696,7 @@ define i1 @test_v32i8(<32 x i8> %a0) { ; AVX-NEXT: sete %al ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %a0) %2 = icmp eq i8 %1, 0 ret i1 %2 } @@ -747,7 +747,7 @@ define i1 @test_v64i8(<64 x i8> %a0) { ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> %a0) %2 = icmp ne i8 %1, 0 ret i1 %2 } @@ -811,7 +811,7 @@ define i1 @test_v128i8(<128 x i8> %a0) { ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> %a0) %2 = icmp eq i8 %1, 0 ret i1 %2 } @@ -841,7 +841,7 @@ define i1 @trunc_v2i64(<2 x i64> %a0) { ; AVX-NEXT: vptest {{.*}}(%rip), %xmm0 ; AVX-NEXT: sete %al ; AVX-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a0) %2 = trunc i64 %1 to i16 %3 = icmp eq i16 %2, 0 ret i1 %3 @@ -888,7 +888,7 @@ define i1 @mask_v8i32(<8 x i32> %a0) { ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a0) %2 = and i32 %1, 2147483648 %3 = icmp eq i32 %2, 0 ret i1 %3 @@ -935,7 +935,7 @@ define i1 @trunc_v16i16(<16 x i16> %a0) { ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %a0) %2 = trunc i16 %1 to i8 %3 = icmp ne i8 %2, 0 ret i1 %3 @@ -1003,7 +1003,7 @@ define i1 @mask_v128i8(<128 x i8> %a0) { ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> %a0) %2 = and i8 %1, 1 %3 = icmp eq i8 %2, 0 ret i1 %3 @@ -1037,34 +1037,34 @@ define zeroext i1 @PR44781(%struct.Box* %0) { ; AVX-NEXT: retq %2 = bitcast %struct.Box* %0 to <4 x i32>* %3 = load <4 x i32>, <4 x i32>* %2, align 4 - %4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %3) + %4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %3) %5 = and i32 %4, 15 %6 = icmp eq i32 %5, 0 ret i1 %6 } -declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.or.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.or.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.or.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.or.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.or.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.or.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.or.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.or.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.or.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.or.v128i8(<128 x i8>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-or.ll b/llvm/test/CodeGen/X86/vector-reduce-or.ll index 35193e9feadcd..63c93f3755826 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-or.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or.ll @@ -24,7 +24,7 @@ define i64 @test_v2i64(<2 x i64> %a0) { ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovq %xmm0, %rax ; AVX-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a0) ret i64 %1 } @@ -66,7 +66,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %a0) ret i64 %1 } @@ -114,7 +114,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %a0) ret i64 %1 } @@ -171,7 +171,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> %a0) ret i64 %1 } @@ -193,7 +193,7 @@ define i32 @test_v2i32(<2 x i32> %a0) { ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a0) ret i32 %1 } @@ -215,7 +215,7 @@ define i32 @test_v4i32(<4 x i32> %a0) { ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a0) ret i32 %1 } @@ -265,7 +265,7 @@ define i32 @test_v8i32(<8 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a0) ret i32 %1 } @@ -321,7 +321,7 @@ define i32 @test_v16i32(<16 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %a0) ret i32 %1 } @@ -386,7 +386,7 @@ define i32 @test_v32i32(<32 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> %a0) ret i32 %1 } @@ -411,7 +411,7 @@ define i16 @test_v2i16(<2 x i16> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> %a0) ret i16 %1 } @@ -436,7 +436,7 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a0) ret i16 %1 } @@ -465,7 +465,7 @@ define i16 @test_v8i16(<8 x i16> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a0) ret i16 %1 } @@ -528,7 +528,7 @@ define i16 @test_v16i16(<16 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %a0) ret i16 %1 } @@ -597,7 +597,7 @@ define i16 @test_v32i16(<32 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> %a0) ret i16 %1 } @@ -675,7 +675,7 @@ define i16 @test_v64i16(<64 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> %a0) ret i16 %1 } @@ -700,7 +700,7 @@ define i8 @test_v2i8(<2 x i8> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> %a0) ret i8 %1 } @@ -726,7 +726,7 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %a0) ret i8 %1 } @@ -756,7 +756,7 @@ define i8 @test_v8i8(<8 x i8> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a0) ret i8 %1 } @@ -790,7 +790,7 @@ define i8 @test_v16i8(<16 x i8> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a0) ret i8 %1 } @@ -862,7 +862,7 @@ define i8 @test_v32i8(<32 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %a0) ret i8 %1 } @@ -940,7 +940,7 @@ define i8 @test_v64i8(<64 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> %a0) ret i8 %1 } @@ -1027,32 +1027,32 @@ define i8 @test_v128i8(<128 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> %a0) ret i8 %1 } -declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.or.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.or.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.or.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.or.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.or.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.or.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.or.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.or.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.or.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.or.v128i8(<128 x i8>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-smax.ll b/llvm/test/CodeGen/X86/vector-reduce-smax.ll index 89354e6b2d9fe..d4c5492508525 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-smax.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-smax.ll @@ -83,7 +83,7 @@ define i64 @test_v2i64(<2 x i64> %a0) { ; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %a0) ret i64 %1 } @@ -209,7 +209,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %a0) ret i64 %1 } @@ -404,7 +404,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %a0) ret i64 %1 } @@ -731,7 +731,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> %a0) ret i64 %1 } @@ -771,7 +771,7 @@ define i32 @test_v2i32(<2 x i32> %a0) { ; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %a0) ret i32 %1 } @@ -819,7 +819,7 @@ define i32 @test_v4i32(<4 x i32> %a0) { ; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a0) ret i32 %1 } @@ -891,7 +891,7 @@ define i32 @test_v8i32(<8 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %a0) ret i32 %1 } @@ -981,7 +981,7 @@ define i32 @test_v16i32(<16 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %a0) ret i32 %1 } @@ -1104,7 +1104,7 @@ define i32 @test_v32i32(<32 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> %a0) ret i32 %1 } @@ -1137,7 +1137,7 @@ define i16 @test_v2i16(<2 x i16> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> %a0) ret i16 %1 } @@ -1172,7 +1172,7 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %a0) ret i16 %1 } @@ -1216,7 +1216,7 @@ define i16 @test_v8i16(<8 x i16> %a0) { ; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %a0) ret i16 %1 } @@ -1280,7 +1280,7 @@ define i16 @test_v16i16(<16 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %a0) ret i16 %1 } @@ -1354,7 +1354,7 @@ define i16 @test_v32i16(<32 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> %a0) ret i16 %1 } @@ -1445,7 +1445,7 @@ define i16 @test_v64i16(<64 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> %a0) ret i16 %1 } @@ -1491,7 +1491,7 @@ define i8 @test_v2i8(<2 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> %a0) ret i8 %1 } @@ -1547,7 +1547,7 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> %a0) ret i8 %1 } @@ -1615,7 +1615,7 @@ define i8 @test_v8i8(<8 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %a0) ret i8 %1 } @@ -1685,7 +1685,7 @@ define i8 @test_v16i8(<16 x i8> %a0) { ; AVX512-NEXT: xorb $127, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %a0) ret i8 %1 } @@ -1781,7 +1781,7 @@ define i8 @test_v32i8(<32 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %a0) ret i8 %1 } @@ -1895,7 +1895,7 @@ define i8 @test_v64i8(<64 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> %a0) ret i8 %1 } @@ -2042,32 +2042,32 @@ define i8 @test_v128i8(<128 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> %a0) ret i8 %1 } -declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.smax.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.smax.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.smax.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.smax.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.smax.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.smax.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-smin.ll b/llvm/test/CodeGen/X86/vector-reduce-smin.ll index 106888b99c28c..9543db9e950bc 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-smin.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-smin.ll @@ -83,7 +83,7 @@ define i64 @test_v2i64(<2 x i64> %a0) { ; AVX512VL-NEXT: vpminsq %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %a0) ret i64 %1 } @@ -209,7 +209,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %a0) ret i64 %1 } @@ -404,7 +404,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %a0) ret i64 %1 } @@ -731,7 +731,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> %a0) ret i64 %1 } @@ -771,7 +771,7 @@ define i32 @test_v2i32(<2 x i32> %a0) { ; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %a0) ret i32 %1 } @@ -819,7 +819,7 @@ define i32 @test_v4i32(<4 x i32> %a0) { ; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a0) ret i32 %1 } @@ -891,7 +891,7 @@ define i32 @test_v8i32(<8 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %a0) ret i32 %1 } @@ -981,7 +981,7 @@ define i32 @test_v16i32(<16 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %a0) ret i32 %1 } @@ -1104,7 +1104,7 @@ define i32 @test_v32i32(<32 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> %a0) ret i32 %1 } @@ -1137,7 +1137,7 @@ define i16 @test_v2i16(<2 x i16> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> %a0) ret i16 %1 } @@ -1172,7 +1172,7 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %a0) ret i16 %1 } @@ -1216,7 +1216,7 @@ define i16 @test_v8i16(<8 x i16> %a0) { ; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %a0) ret i16 %1 } @@ -1280,7 +1280,7 @@ define i16 @test_v16i16(<16 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %a0) ret i16 %1 } @@ -1354,7 +1354,7 @@ define i16 @test_v32i16(<32 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> %a0) ret i16 %1 } @@ -1445,7 +1445,7 @@ define i16 @test_v64i16(<64 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> %a0) ret i16 %1 } @@ -1491,7 +1491,7 @@ define i8 @test_v2i8(<2 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> %a0) ret i8 %1 } @@ -1547,7 +1547,7 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> %a0) ret i8 %1 } @@ -1615,7 +1615,7 @@ define i8 @test_v8i8(<8 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %a0) ret i8 %1 } @@ -1685,7 +1685,7 @@ define i8 @test_v16i8(<16 x i8> %a0) { ; AVX512-NEXT: xorb $-128, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %a0) ret i8 %1 } @@ -1781,7 +1781,7 @@ define i8 @test_v32i8(<32 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %a0) ret i8 %1 } @@ -1895,7 +1895,7 @@ define i8 @test_v64i8(<64 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> %a0) ret i8 %1 } @@ -2042,32 +2042,32 @@ define i8 @test_v128i8(<128 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> %a0) ret i8 %1 } -declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.smin.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.smin.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.smin.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.smin.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.smin.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.smin.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.smin.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax.ll b/llvm/test/CodeGen/X86/vector-reduce-umax.ll index aedde4d11e060..27bf159b0c8c2 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-umax.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-umax.ll @@ -89,7 +89,7 @@ define i64 @test_v2i64(<2 x i64> %a0) { ; AVX512VL-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a0) ret i64 %1 } @@ -231,7 +231,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %a0) ret i64 %1 } @@ -453,7 +453,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %a0) ret i64 %1 } @@ -832,7 +832,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> %a0) ret i64 %1 } @@ -875,7 +875,7 @@ define i32 @test_v2i32(<2 x i32> %a0) { ; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %a0) ret i32 %1 } @@ -929,7 +929,7 @@ define i32 @test_v4i32(<4 x i32> %a0) { ; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a0) ret i32 %1 } @@ -1010,7 +1010,7 @@ define i32 @test_v8i32(<8 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %a0) ret i32 %1 } @@ -1115,7 +1115,7 @@ define i32 @test_v16i32(<16 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %a0) ret i32 %1 } @@ -1265,7 +1265,7 @@ define i32 @test_v32i32(<32 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> %a0) ret i32 %1 } @@ -1311,7 +1311,7 @@ define i16 @test_v2i16(<2 x i16> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> %a0) ret i16 %1 } @@ -1361,7 +1361,7 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %a0) ret i16 %1 } @@ -1422,7 +1422,7 @@ define i16 @test_v8i16(<8 x i16> %a0) { ; AVX512VL-NEXT: notl %eax ; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512VL-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %a0) ret i16 %1 } @@ -1505,7 +1505,7 @@ define i16 @test_v16i16(<16 x i16> %a0) { ; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %a0) ret i16 %1 } @@ -1602,7 +1602,7 @@ define i16 @test_v32i16(<32 x i16> %a0) { ; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> %a0) ret i16 %1 } @@ -1721,7 +1721,7 @@ define i16 @test_v64i16(<64 x i16> %a0) { ; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> %a0) ret i16 %1 } @@ -1754,7 +1754,7 @@ define i8 @test_v2i8(<2 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> %a0) ret i8 %1 } @@ -1790,7 +1790,7 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> %a0) ret i8 %1 } @@ -1832,7 +1832,7 @@ define i8 @test_v8i8(<8 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %a0) ret i8 %1 } @@ -1901,7 +1901,7 @@ define i8 @test_v16i8(<16 x i8> %a0) { ; AVX512VL-NEXT: notb %al ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %a0) ret i8 %1 } @@ -1994,7 +1994,7 @@ define i8 @test_v32i8(<32 x i8> %a0) { ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %a0) ret i8 %1 } @@ -2099,7 +2099,7 @@ define i8 @test_v64i8(<64 x i8> %a0) { ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> %a0) ret i8 %1 } @@ -2222,32 +2222,32 @@ define i8 @test_v128i8(<128 x i8> %a0) { ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> %a0) ret i8 %1 } -declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.umax.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.umax.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.umax.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.umax.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.umax.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.umax.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.umax.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin.ll b/llvm/test/CodeGen/X86/vector-reduce-umin.ll index c8195d2412944..dee8970b96a5b 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-umin.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-umin.ll @@ -89,7 +89,7 @@ define i64 @test_v2i64(<2 x i64> %a0) { ; AVX512VL-NEXT: vpminuq %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %a0) ret i64 %1 } @@ -232,7 +232,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %a0) ret i64 %1 } @@ -456,7 +456,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %a0) ret i64 %1 } @@ -836,7 +836,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> %a0) ret i64 %1 } @@ -879,7 +879,7 @@ define i32 @test_v2i32(<2 x i32> %a0) { ; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %a0) ret i32 %1 } @@ -933,7 +933,7 @@ define i32 @test_v4i32(<4 x i32> %a0) { ; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a0) ret i32 %1 } @@ -1014,7 +1014,7 @@ define i32 @test_v8i32(<8 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %a0) ret i32 %1 } @@ -1119,7 +1119,7 @@ define i32 @test_v16i32(<16 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %a0) ret i32 %1 } @@ -1269,7 +1269,7 @@ define i32 @test_v32i32(<32 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> %a0) ret i32 %1 } @@ -1315,7 +1315,7 @@ define i16 @test_v2i16(<2 x i16> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> %a0) ret i16 %1 } @@ -1365,7 +1365,7 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %a0) ret i16 %1 } @@ -1407,7 +1407,7 @@ define i16 @test_v8i16(<8 x i16> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %a0) ret i16 %1 } @@ -1467,7 +1467,7 @@ define i16 @test_v16i16(<16 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %a0) ret i16 %1 } @@ -1539,7 +1539,7 @@ define i16 @test_v32i16(<32 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> %a0) ret i16 %1 } @@ -1632,7 +1632,7 @@ define i16 @test_v64i16(<64 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> %a0) ret i16 %1 } @@ -1665,7 +1665,7 @@ define i8 @test_v2i8(<2 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> %a0) ret i8 %1 } @@ -1701,7 +1701,7 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> %a0) ret i8 %1 } @@ -1743,7 +1743,7 @@ define i8 @test_v8i8(<8 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %a0) ret i8 %1 } @@ -1791,7 +1791,7 @@ define i8 @test_v16i8(<16 x i8> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %a0) ret i8 %1 } @@ -1859,7 +1859,7 @@ define i8 @test_v32i8(<32 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %a0) ret i8 %1 } @@ -1937,7 +1937,7 @@ define i8 @test_v64i8(<64 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> %a0) ret i8 %1 } @@ -2032,32 +2032,32 @@ define i8 @test_v128i8(<128 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> %a0) ret i8 %1 } -declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.umin.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.umin.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.umin.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.umin.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.umin.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll index 5b0109bcd4eb2..efb45ee36f9fd 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll @@ -57,7 +57,7 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) { ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = trunc <2 x i64> %0 to <2 x i1> - %b = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a) ret i1 %b } @@ -107,7 +107,7 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) { ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = trunc <4 x i32> %0 to <4 x i1> - %b = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a) ret i1 %b } @@ -172,7 +172,7 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) { ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = trunc <8 x i8> %0 to <8 x i1> - %b = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) ret i1 %b } @@ -201,7 +201,7 @@ define i1 @trunc_v16i8_v16i1(<16 x i8>) { ; AVX512-NEXT: setnp %al ; AVX512-NEXT: retq %a = trunc <16 x i8> %0 to <16 x i1> - %b = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) ret i1 %b } @@ -256,7 +256,7 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <4 x i64> %0 to <4 x i1> - %b = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a) ret i1 %b } @@ -345,7 +345,7 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <8 x i32> %0 to <8 x i1> - %b = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) ret i1 %b } @@ -423,7 +423,7 @@ define i1 @trunc_v16i16_v16i1(<16 x i16>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <16 x i16> %0 to <16 x i1> - %b = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) ret i1 %b } @@ -504,7 +504,7 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <32 x i8> %0 to <32 x i1> - %b = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a) ret i1 %b } @@ -609,7 +609,7 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <8 x i64> %0 to <8 x i1> - %b = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) ret i1 %b } @@ -717,7 +717,7 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <16 x i32> %0 to <16 x i1> - %b = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) ret i1 %b } @@ -815,7 +815,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <32 x i16> %0 to <32 x i1> - %b = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a) ret i1 %b } @@ -912,7 +912,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = trunc <64 x i8> %0 to <64 x i1> - %b = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %a) ret i1 %b } @@ -978,7 +978,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = icmp eq <2 x i64> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a) ret i1 %b } @@ -1029,7 +1029,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) { ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = icmp eq <4 x i32> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a) ret i1 %b } @@ -1082,7 +1082,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) { ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = icmp eq <8 x i8> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) ret i1 %b } @@ -1136,7 +1136,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) { ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = icmp eq <16 x i8> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) ret i1 %b } @@ -1219,7 +1219,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <4 x i64> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a) ret i1 %b } @@ -1288,7 +1288,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <8 x i32> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) ret i1 %b } @@ -1366,7 +1366,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <16 x i16> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) ret i1 %b } @@ -1454,7 +1454,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <32 x i8> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a) ret i1 %b } @@ -1557,7 +1557,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <8 x i64> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) ret i1 %b } @@ -1643,7 +1643,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <16 x i32> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) ret i1 %b } @@ -1743,7 +1743,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <32 x i16> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a) ret i1 %b } @@ -1853,13 +1853,13 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) { ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq %a = icmp eq <64 x i8> %0, zeroinitializer - %b = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> %a) + %b = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %a) ret i1 %b } -declare i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1>) +declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1>) +declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1>) +declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1>) +declare i1 @llvm.vector.reduce.xor.v16i1(<16 x i1>) +declare i1 @llvm.vector.reduce.xor.v32i1(<32 x i1>) +declare i1 @llvm.vector.reduce.xor.v64i1(<64 x i1>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor.ll b/llvm/test/CodeGen/X86/vector-reduce-xor.ll index 4fd84897445c4..b1306c7dabbed 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-xor.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-xor.ll @@ -24,7 +24,7 @@ define i64 @test_v2i64(<2 x i64> %a0) { ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovq %xmm0, %rax ; AVX-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %a0) ret i64 %1 } @@ -66,7 +66,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %a0) ret i64 %1 } @@ -114,7 +114,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> %a0) ret i64 %1 } @@ -171,7 +171,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> %a0) + %1 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> %a0) ret i64 %1 } @@ -193,7 +193,7 @@ define i32 @test_v2i32(<2 x i32> %a0) { ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %a0) ret i32 %1 } @@ -215,7 +215,7 @@ define i32 @test_v4i32(<4 x i32> %a0) { ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a0) ret i32 %1 } @@ -265,7 +265,7 @@ define i32 @test_v8i32(<8 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %a0) ret i32 %1 } @@ -321,7 +321,7 @@ define i32 @test_v16i32(<16 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %a0) ret i32 %1 } @@ -386,7 +386,7 @@ define i32 @test_v32i32(<32 x i32> %a0) { ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> %a0) + %1 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> %a0) ret i32 %1 } @@ -411,7 +411,7 @@ define i16 @test_v2i16(<2 x i16> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> %a0) ret i16 %1 } @@ -436,7 +436,7 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %a0) ret i16 %1 } @@ -465,7 +465,7 @@ define i16 @test_v8i16(<8 x i16> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %a0) ret i16 %1 } @@ -528,7 +528,7 @@ define i16 @test_v16i16(<16 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %a0) ret i16 %1 } @@ -597,7 +597,7 @@ define i16 @test_v32i16(<32 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> %a0) ret i16 %1 } @@ -675,7 +675,7 @@ define i16 @test_v64i16(<64 x i16> %a0) { ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> %a0) + %1 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> %a0) ret i16 %1 } @@ -700,7 +700,7 @@ define i8 @test_v2i8(<2 x i8> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> %a0) ret i8 %1 } @@ -726,7 +726,7 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> %a0) ret i8 %1 } @@ -756,7 +756,7 @@ define i8 @test_v8i8(<8 x i8> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a0) ret i8 %1 } @@ -790,7 +790,7 @@ define i8 @test_v16i8(<16 x i8> %a0) { ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a0) ret i8 %1 } @@ -862,7 +862,7 @@ define i8 @test_v32i8(<32 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %a0) ret i8 %1 } @@ -940,7 +940,7 @@ define i8 @test_v64i8(<64 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> %a0) ret i8 %1 } @@ -1027,32 +1027,32 @@ define i8 @test_v128i8(<128 x i8> %a0) { ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> %a0) + %1 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> %a0) ret i8 %1 } -declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.xor.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.xor.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.xor.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.xor.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.xor.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.xor.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.xor.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.xor.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.xor.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.xor.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.xor.v128i8(<128 x i8>) diff --git a/llvm/test/Instrumentation/MemorySanitizer/experimental-reduce.ll b/llvm/test/Instrumentation/MemorySanitizer/reduce.ll similarity index 66% rename from llvm/test/Instrumentation/MemorySanitizer/experimental-reduce.ll rename to llvm/test/Instrumentation/MemorySanitizer/reduce.ll index e81333b515e25..2629756644e59 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/experimental-reduce.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/reduce.ll @@ -5,9 +5,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -declare i32 @llvm.experimental.vector.reduce.add(<3 x i32>) -declare i32 @llvm.experimental.vector.reduce.and(<3 x i32>) -declare i32 @llvm.experimental.vector.reduce.or(<3 x i32>) +declare i32 @llvm.vector.reduce.add(<3 x i32>) +declare i32 @llvm.vector.reduce.and(<3 x i32>) +declare i32 @llvm.vector.reduce.or(<3 x i32>) ; CHECK-LABEL: @reduce_add define i32 @reduce_add() sanitize_memory { @@ -17,9 +17,9 @@ define i32 @reduce_add() sanitize_memory { %o = load <3 x i32>, <3 x i32> *%p ; CHECK: [[O_SHADOW:%.*]] = load <3 x i32>, <3 x i32>* ; CHECK: [[O_ORIGIN:%.*]] = load i32, i32* -; CHECK: [[R_SHADOW:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v3i32(<3 x i32> [[O_SHADOW]]) -; CHECK: [[R:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v3i32(<3 x i32> [[O]]) - %r = call i32 @llvm.experimental.vector.reduce.add(<3 x i32> %o) +; CHECK: [[R_SHADOW:%.*]] = call i32 @llvm.vector.reduce.or.v3i32(<3 x i32> [[O_SHADOW]]) +; CHECK: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[O]]) + %r = call i32 @llvm.vector.reduce.add(<3 x i32> %o) ; CHECK: store i32 [[R_SHADOW]], {{.*}} @__msan_retval_tls ; CHECK: store i32 [[O_ORIGIN]], {{.*}} @__msan_retval_origin_tls ; CHECK: ret i32 [[R]] @@ -35,11 +35,11 @@ define i32 @reduce_and() sanitize_memory { ; CHECK: [[O_SHADOW:%.*]] = load <3 x i32>, <3 x i32>* ; CHECK: [[O_ORIGIN:%.*]] = load i32, i32* ; CHECK: [[O_SHADOW_1:%.*]] = or <3 x i32> [[O]], [[O_SHADOW]] -; CHECK: [[O_SHADOW_2:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> [[O_SHADOW_1]] -; CHECK: [[O_SHADOW_3:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v3i32(<3 x i32> [[O_SHADOW]]) +; CHECK: [[O_SHADOW_2:%.*]] = call i32 @llvm.vector.reduce.and.v3i32(<3 x i32> [[O_SHADOW_1]] +; CHECK: [[O_SHADOW_3:%.*]] = call i32 @llvm.vector.reduce.or.v3i32(<3 x i32> [[O_SHADOW]]) ; CHECK: [[R_SHADOW:%.*]] = and i32 [[O_SHADOW_2]], [[O_SHADOW_3]] -; CHECK: [[R:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> [[O]]) - %r = call i32 @llvm.experimental.vector.reduce.and(<3 x i32> %o) +; CHECK: [[R:%.*]] = call i32 @llvm.vector.reduce.and.v3i32(<3 x i32> [[O]]) + %r = call i32 @llvm.vector.reduce.and(<3 x i32> %o) ; CHECK: store i32 [[R_SHADOW]], {{.*}} @__msan_retval_tls ; CHECK: store i32 [[O_ORIGIN]], {{.*}} @__msan_retval_origin_tls ; CHECK: ret i32 [[R]] @@ -56,11 +56,11 @@ define i32 @reduce_or() sanitize_memory { ; CHECK: [[O_ORIGIN:%.*]] = load i32, i32* ; CHECK: [[NOT_O:%.*]] = xor <3 x i32> [[O]], ; CHECK: [[O_SHADOW_1:%.*]] = or <3 x i32> [[NOT_O]], [[O_SHADOW]] -; CHECK: [[O_SHADOW_2:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> [[O_SHADOW_1]] -; CHECK: [[O_SHADOW_3:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v3i32(<3 x i32> [[O_SHADOW]]) +; CHECK: [[O_SHADOW_2:%.*]] = call i32 @llvm.vector.reduce.and.v3i32(<3 x i32> [[O_SHADOW_1]] +; CHECK: [[O_SHADOW_3:%.*]] = call i32 @llvm.vector.reduce.or.v3i32(<3 x i32> [[O_SHADOW]]) ; CHECK: [[R_SHADOW:%.*]] = and i32 [[O_SHADOW_2]], [[O_SHADOW_3]] -; CHECK: [[R:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v3i32(<3 x i32> [[O]]) - %r = call i32 @llvm.experimental.vector.reduce.or(<3 x i32> %o) +; CHECK: [[R:%.*]] = call i32 @llvm.vector.reduce.or.v3i32(<3 x i32> [[O]]) + %r = call i32 @llvm.vector.reduce.or(<3 x i32> %o) ; CHECK: store i32 [[R_SHADOW]], {{.*}} @__msan_retval_tls ; CHECK: store i32 [[O_ORIGIN]], {{.*}} @__msan_retval_origin_tls ; CHECK: ret i32 [[R]] diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll index f11307468457c..02551c630c8ac 100644 --- a/llvm/test/Transforms/InstCombine/vector-reductions.ll +++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll @@ -1,23 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>) -declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>) +declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>) declare void @use_f32(float) -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) declare void @use_i32(i32) define float @diff_of_sums_v4f32(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { ; CHECK-LABEL: @diff_of_sums_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz <4 x float> [[V0:%.*]], [[V1:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = call reassoc nsz float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = fsub reassoc nsz float [[TMP2]], [[A1:%.*]] ; CHECK-NEXT: ret float [[R]] ; - %r0 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0) - %r1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a1, <4 x float> %v1) + %r0 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0) + %r1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1) %r = fsub reassoc nsz float %r0, %r1 ret float %r } @@ -26,13 +26,13 @@ define float @diff_of_sums_v4f32(float %a0, <4 x float> %v0, float %a1, <4 x flo define float @diff_of_sums_v4f32_fmf(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { ; CHECK-LABEL: @diff_of_sums_v4f32_fmf( -; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) -; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) +; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) +; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) ; CHECK-NEXT: [[R:%.*]] = fsub nnan ninf nsz float [[R0]], [[R1]] ; CHECK-NEXT: ret float [[R]] ; - %r0 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0) - %r1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a1, <4 x float> %v1) + %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0) + %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1) %r = fsub ninf nnan nsz float %r0, %r1 ret float %r } @@ -41,15 +41,15 @@ define float @diff_of_sums_v4f32_fmf(float %a0, <4 x float> %v0, float %a1, <4 x define float @diff_of_sums_extra_use1(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { ; CHECK-LABEL: @diff_of_sums_extra_use1( -; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) +; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) ; CHECK-NEXT: call void @use_f32(float [[R0]]) -; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) +; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) ; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]] ; CHECK-NEXT: ret float [[R]] ; - %r0 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0) + %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0) call void @use_f32(float %r0) - %r1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a1, <4 x float> %v1) + %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1) %r = fsub fast float %r0, %r1 ret float %r } @@ -58,14 +58,14 @@ define float @diff_of_sums_extra_use1(float %a0, <4 x float> %v0, float %a1, <4 define float @diff_of_sums_extra_use2(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { ; CHECK-LABEL: @diff_of_sums_extra_use2( -; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) -; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) +; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) +; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) ; CHECK-NEXT: call void @use_f32(float [[R1]]) ; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]] ; CHECK-NEXT: ret float [[R]] ; - %r0 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0) - %r1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a1, <4 x float> %v1) + %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0) + %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1) call void @use_f32(float %r1) %r = fsub fast float %r0, %r1 ret float %r @@ -75,13 +75,13 @@ define float @diff_of_sums_extra_use2(float %a0, <4 x float> %v0, float %a1, <4 define float @diff_of_sums_type_mismatch(float %a0, <4 x float> %v0, float %a1, <8 x float> %v1) { ; CHECK-LABEL: @diff_of_sums_type_mismatch( -; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) -; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float [[A1:%.*]], <8 x float> [[V1:%.*]]) +; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) +; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float [[A1:%.*]], <8 x float> [[V1:%.*]]) ; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]] ; CHECK-NEXT: ret float [[R]] ; - %r0 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0) - %r1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float %a1, <8 x float> %v1) + %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0) + %r1 = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float %a1, <8 x float> %v1) %r = fsub fast float %r0, %r1 ret float %r } @@ -89,11 +89,11 @@ define float @diff_of_sums_type_mismatch(float %a0, <4 x float> %v0, float %a1, define i32 @diff_of_sums_v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @diff_of_sums_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP2]] ; - %r0 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v0) - %r1 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v1) + %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0) + %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1) %r = sub i32 %r0, %r1 ret i32 %r } @@ -102,15 +102,15 @@ define i32 @diff_of_sums_v4i32(<4 x i32> %v0, <4 x i32> %v1) { define i32 @diff_of_sums_v4i32_extra_use1(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @diff_of_sums_v4i32_extra_use1( -; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]]) +; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]]) ; CHECK-NEXT: call void @use_i32(i32 [[R0]]) -; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]]) +; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]]) ; CHECK-NEXT: [[R:%.*]] = sub i32 [[R0]], [[R1]] ; CHECK-NEXT: ret i32 [[R]] ; - %r0 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v0) + %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0) call void @use_i32(i32 %r0) - %r1 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v1) + %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1) %r = sub i32 %r0, %r1 ret i32 %r } @@ -119,14 +119,14 @@ define i32 @diff_of_sums_v4i32_extra_use1(<4 x i32> %v0, <4 x i32> %v1) { define i32 @diff_of_sums_v4i32_extra_use2(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @diff_of_sums_v4i32_extra_use2( -; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]]) -; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]]) +; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]]) +; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]]) ; CHECK-NEXT: call void @use_i32(i32 [[R1]]) ; CHECK-NEXT: [[R:%.*]] = sub i32 [[R0]], [[R1]] ; CHECK-NEXT: ret i32 [[R]] ; - %r0 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v0) - %r1 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v1) + %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0) + %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1) call void @use_i32(i32 %r1) %r = sub i32 %r0, %r1 ret i32 %r @@ -136,13 +136,13 @@ define i32 @diff_of_sums_v4i32_extra_use2(<4 x i32> %v0, <4 x i32> %v1) { define i32 @diff_of_sums_type_mismatch2(<8 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @diff_of_sums_type_mismatch2( -; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[V0:%.*]]) -; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]]) +; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[V0:%.*]]) +; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]]) ; CHECK-NEXT: [[R:%.*]] = sub i32 [[R0]], [[R1]] ; CHECK-NEXT: ret i32 [[R]] ; - %r0 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %v0) - %r1 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v1) + %r0 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v0) + %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1) %r = sub i32 %r0, %r1 ret i32 %r } diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll index 5f1861e24b622..a2bc86c9ac665 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll @@ -1,31 +1,31 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instsimplify -S | FileCheck %s -declare i32 @llvm.experimental.vector.reduce.add.v1i32(<1 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.mul.v1i32(<1 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.and.v1i32(<1 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.or.v1i32(<1 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.xor.v1i32(<1 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.smin.v1i32(<1 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.smax.v1i32(<1 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.umin.v1i32(<1 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.umax.v1i32(<1 x i32> %a) -declare i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %a) +declare i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a) +declare i32 @llvm.vector.reduce.mul.v1i32(<1 x i32> %a) +declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %a) +declare i32 @llvm.vector.reduce.and.v1i32(<1 x i32> %a) +declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %a) +declare i32 @llvm.vector.reduce.or.v1i32(<1 x i32> %a) +declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a) +declare i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> %a) +declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %a) +declare i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> %a) +declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %a) +declare i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> %a) +declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %a) +declare i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> %a) +declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %a) +declare i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> %a) +declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %a) define i32 @add_0() { ; CHECK-LABEL: @add_0( ; CHECK-NEXT: ret i32 0 ; - %x = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> zeroinitializer) + %x = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> zeroinitializer) ret i32 %x } @@ -33,7 +33,7 @@ define i32 @add_1() { ; CHECK-LABEL: @add_1( ; CHECK-NEXT: ret i32 8 ; - %x = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> ) ret i32 %x } @@ -41,7 +41,7 @@ define i32 @add_inc() { ; CHECK-LABEL: @add_inc( ; CHECK-NEXT: ret i32 18 ; - %x = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> ) ret i32 %x } @@ -49,25 +49,25 @@ define i32 @add_1v() { ; CHECK-LABEL: @add_1v( ; CHECK-NEXT: ret i32 10 ; - %x = call i32 @llvm.experimental.vector.reduce.add.v1i32(<1 x i32> ) + %x = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> ) ret i32 %x } define i32 @add_undef() { ; CHECK-LABEL: @add_undef( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) + %x = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) ret i32 %x } define i32 @add_undef1() { ; CHECK-LABEL: @add_undef1( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> ) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> ) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> ) ret i32 %x } @@ -77,7 +77,7 @@ define i32 @mul_0() { ; CHECK-LABEL: @mul_0( ; CHECK-NEXT: ret i32 0 ; - %x = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> zeroinitializer) + %x = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> zeroinitializer) ret i32 %x } @@ -85,7 +85,7 @@ define i32 @mul_1() { ; CHECK-LABEL: @mul_1( ; CHECK-NEXT: ret i32 1 ; - %x = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> ) ret i32 %x } @@ -93,7 +93,7 @@ define i32 @mul_inc() { ; CHECK-LABEL: @mul_inc( ; CHECK-NEXT: ret i32 40320 ; - %x = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> ) ret i32 %x } @@ -101,25 +101,25 @@ define i32 @mul_1v() { ; CHECK-LABEL: @mul_1v( ; CHECK-NEXT: ret i32 10 ; - %x = call i32 @llvm.experimental.vector.reduce.mul.v1i32(<1 x i32> ) + %x = call i32 @llvm.vector.reduce.mul.v1i32(<1 x i32> ) ret i32 %x } define i32 @mul_undef() { ; CHECK-LABEL: @mul_undef( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) + %x = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) ret i32 %x } define i32 @mul_undef1() { ; CHECK-LABEL: @mul_undef1( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> ) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> ) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> ) ret i32 %x } @@ -128,7 +128,7 @@ define i32 @and_0() { ; CHECK-LABEL: @and_0( ; CHECK-NEXT: ret i32 0 ; - %x = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> zeroinitializer) + %x = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> zeroinitializer) ret i32 %x } @@ -136,7 +136,7 @@ define i32 @and_1() { ; CHECK-LABEL: @and_1( ; CHECK-NEXT: ret i32 1 ; - %x = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> ) ret i32 %x } @@ -144,7 +144,7 @@ define i32 @and_inc() { ; CHECK-LABEL: @and_inc( ; CHECK-NEXT: ret i32 0 ; - %x = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> ) ret i32 %x } @@ -152,25 +152,25 @@ define i32 @and_1v() { ; CHECK-LABEL: @and_1v( ; CHECK-NEXT: ret i32 10 ; - %x = call i32 @llvm.experimental.vector.reduce.and.v1i32(<1 x i32> ) + %x = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> ) ret i32 %x } define i32 @and_undef() { ; CHECK-LABEL: @and_undef( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) + %x = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef) ret i32 %x } define i32 @and_undef1() { ; CHECK-LABEL: @and_undef1( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> ) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> ) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> ) ret i32 %x } @@ -179,7 +179,7 @@ define i32 @or_0() { ; CHECK-LABEL: @or_0( ; CHECK-NEXT: ret i32 0 ; - %x = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> zeroinitializer) + %x = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> zeroinitializer) ret i32 %x } @@ -187,7 +187,7 @@ define i32 @or_1() { ; CHECK-LABEL: @or_1( ; CHECK-NEXT: ret i32 1 ; - %x = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> ) ret i32 %x } @@ -195,7 +195,7 @@ define i32 @or_inc() { ; CHECK-LABEL: @or_inc( ; CHECK-NEXT: ret i32 -1 ; - %x = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> ) ret i32 %x } @@ -203,25 +203,25 @@ define i32 @or_1v() { ; CHECK-LABEL: @or_1v( ; CHECK-NEXT: ret i32 10 ; - %x = call i32 @llvm.experimental.vector.reduce.or.v1i32(<1 x i32> ) + %x = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> ) ret i32 %x } define i32 @or_undef() { ; CHECK-LABEL: @or_undef( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) + %x = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef) ret i32 %x } define i32 @or_undef1() { ; CHECK-LABEL: @or_undef1( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> ) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> ) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> ) ret i32 %x } @@ -230,7 +230,7 @@ define i32 @xor_0() { ; CHECK-LABEL: @xor_0( ; CHECK-NEXT: ret i32 0 ; - %x = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> zeroinitializer) + %x = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> zeroinitializer) ret i32 %x } @@ -238,7 +238,7 @@ define i32 @xor_1() { ; CHECK-LABEL: @xor_1( ; CHECK-NEXT: ret i32 0 ; - %x = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> ) ret i32 %x } @@ -246,7 +246,7 @@ define i32 @xor_inc() { ; CHECK-LABEL: @xor_inc( ; CHECK-NEXT: ret i32 10 ; - %x = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> ) ret i32 %x } @@ -254,25 +254,25 @@ define i32 @xor_1v() { ; CHECK-LABEL: @xor_1v( ; CHECK-NEXT: ret i32 10 ; - %x = call i32 @llvm.experimental.vector.reduce.xor.v1i32(<1 x i32> ) + %x = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> ) ret i32 %x } define i32 @xor_undef() { ; CHECK-LABEL: @xor_undef( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) + %x = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) ret i32 %x } define i32 @xor_undef1() { ; CHECK-LABEL: @xor_undef1( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> ) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> ) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> ) ret i32 %x } @@ -281,7 +281,7 @@ define i32 @smin_0() { ; CHECK-LABEL: @smin_0( ; CHECK-NEXT: ret i32 0 ; - %x = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> zeroinitializer) + %x = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> zeroinitializer) ret i32 %x } @@ -289,7 +289,7 @@ define i32 @smin_1() { ; CHECK-LABEL: @smin_1( ; CHECK-NEXT: ret i32 1 ; - %x = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> ) ret i32 %x } @@ -297,7 +297,7 @@ define i32 @smin_inc() { ; CHECK-LABEL: @smin_inc( ; CHECK-NEXT: ret i32 -6 ; - %x = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> ) ret i32 %x } @@ -305,25 +305,25 @@ define i32 @smin_1v() { ; CHECK-LABEL: @smin_1v( ; CHECK-NEXT: ret i32 10 ; - %x = call i32 @llvm.experimental.vector.reduce.smin.v1i32(<1 x i32> ) + %x = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> ) ret i32 %x } define i32 @smin_undef() { ; CHECK-LABEL: @smin_undef( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) + %x = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) ret i32 %x } define i32 @smin_undef1() { ; CHECK-LABEL: @smin_undef1( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> ) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> ) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> ) ret i32 %x } @@ -332,7 +332,7 @@ define i32 @smax_0() { ; CHECK-LABEL: @smax_0( ; CHECK-NEXT: ret i32 0 ; - %x = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> zeroinitializer) + %x = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> zeroinitializer) ret i32 %x } @@ -340,7 +340,7 @@ define i32 @smax_1() { ; CHECK-LABEL: @smax_1( ; CHECK-NEXT: ret i32 1 ; - %x = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> ) ret i32 %x } @@ -348,7 +348,7 @@ define i32 @smax_inc() { ; CHECK-LABEL: @smax_inc( ; CHECK-NEXT: ret i32 8 ; - %x = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> ) ret i32 %x } @@ -356,25 +356,25 @@ define i32 @smax_1v() { ; CHECK-LABEL: @smax_1v( ; CHECK-NEXT: ret i32 10 ; - %x = call i32 @llvm.experimental.vector.reduce.smax.v1i32(<1 x i32> ) + %x = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> ) ret i32 %x } define i32 @smax_undef() { ; CHECK-LABEL: @smax_undef( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) + %x = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) ret i32 %x } define i32 @smax_undef1() { ; CHECK-LABEL: @smax_undef1( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> ) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> ) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> ) ret i32 %x } @@ -383,7 +383,7 @@ define i32 @umin_0() { ; CHECK-LABEL: @umin_0( ; CHECK-NEXT: ret i32 0 ; - %x = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> zeroinitializer) + %x = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> zeroinitializer) ret i32 %x } @@ -391,7 +391,7 @@ define i32 @umin_1() { ; CHECK-LABEL: @umin_1( ; CHECK-NEXT: ret i32 1 ; - %x = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> ) ret i32 %x } @@ -399,7 +399,7 @@ define i32 @umin_inc() { ; CHECK-LABEL: @umin_inc( ; CHECK-NEXT: ret i32 1 ; - %x = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> ) ret i32 %x } @@ -407,25 +407,25 @@ define i32 @umin_1v() { ; CHECK-LABEL: @umin_1v( ; CHECK-NEXT: ret i32 10 ; - %x = call i32 @llvm.experimental.vector.reduce.umin.v1i32(<1 x i32> ) + %x = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> ) ret i32 %x } define i32 @umin_undef() { ; CHECK-LABEL: @umin_undef( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) + %x = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) ret i32 %x } define i32 @umin_undef1() { ; CHECK-LABEL: @umin_undef1( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> ) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> ) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> ) ret i32 %x } @@ -434,7 +434,7 @@ define i32 @umax_0() { ; CHECK-LABEL: @umax_0( ; CHECK-NEXT: ret i32 0 ; - %x = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> zeroinitializer) + %x = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> zeroinitializer) ret i32 %x } @@ -442,7 +442,7 @@ define i32 @umax_1() { ; CHECK-LABEL: @umax_1( ; CHECK-NEXT: ret i32 1 ; - %x = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> ) ret i32 %x } @@ -450,7 +450,7 @@ define i32 @umax_inc() { ; CHECK-LABEL: @umax_inc( ; CHECK-NEXT: ret i32 -3 ; - %x = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> ) ret i32 %x } @@ -458,24 +458,24 @@ define i32 @umax_1v() { ; CHECK-LABEL: @umax_1v( ; CHECK-NEXT: ret i32 10 ; - %x = call i32 @llvm.experimental.vector.reduce.umax.v1i32(<1 x i32> ) + %x = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> ) ret i32 %x } define i32 @umax_undef() { ; CHECK-LABEL: @umax_undef( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) + %x = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) ret i32 %x } define i32 @umax_undef1d() { ; CHECK-LABEL: @umax_undef1d( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> ) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> ) ; CHECK-NEXT: ret i32 [[X]] ; - %x = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> ) + %x = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> ) ret i32 %x } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll index eaed8276a9c66..0f2dfaeb55f6f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll @@ -8,7 +8,7 @@ target triple = "aarch64--linux-gnu" ; Function Attrs: norecurse nounwind readonly define i32 @fn1() local_unnamed_addr #0 { ; Ensure that we don't emit reduction intrinsics for unsupported short reductions. -; CHECK-NOT: @llvm.experimental.vector.reduce +; CHECK-NOT: @llvm.vector.reduce entry: %0 = load i32, i32* @b, align 4, !tbaa !1 %cmp40 = icmp sgt i32 %0, 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll index 50449d76b827a..a95c0aa6f375f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll @@ -20,7 +20,7 @@ target triple = "aarch64--linux-gnu" ; CHECK: add <16 x i8> ; ; CHECK: middle.block: -; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> +; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> ; CHECK: zext i8 [[Rdx]] to i32 ; define i8 @reduction_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) { @@ -75,7 +75,7 @@ for.body: ; CHECK: add <8 x i16> ; ; CHECK: middle.block: -; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> +; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> ; CHECK: zext i16 [[Rdx]] to i32 ; define i16 @reduction_i16_1(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %n) { @@ -132,7 +132,7 @@ for.body: ; CHECK: add <8 x i16> ; ; CHECK: middle.block: -; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> +; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> ; CHECK: zext i16 [[Rdx]] to i32 ; define i16 @reduction_i16_2(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) { diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index d9254f8d9323f..6142b9b0b47f7 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -628,7 +628,7 @@ end: ; preds = %end, %entry ret void } -declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll index b7dc26eb4556d..2cd97c1b416d3 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll @@ -23,7 +23,7 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]]) ; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] @@ -77,7 +77,7 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) ; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] @@ -132,7 +132,7 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP6]]) ; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] @@ -186,7 +186,7 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP6]]) ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -240,7 +240,7 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP6]]) ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -294,7 +294,7 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP6]]) ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -348,7 +348,7 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP6]]) ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -402,7 +402,7 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP16:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP6]]) ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -451,7 +451,7 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP18:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP3]]) ; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -508,7 +508,7 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP20:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP3]]) ; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll index 57b63079ba889..787fe55664223 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll @@ -41,7 +41,7 @@ define i32 @mla_i32(i8* noalias nocapture readonly %A, i8* noalias nocapture rea ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -129,7 +129,7 @@ define i32 @mla_i8(i8* noalias nocapture readonly %A, i8* noalias nocapture read ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> [[TMP11]]) +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP11]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -210,7 +210,7 @@ define i32 @add_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -281,7 +281,7 @@ define i32 @mul_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -352,7 +352,7 @@ define i32 @and_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -423,7 +423,7 @@ define i32 @or_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -494,7 +494,7 @@ define i32 @xor_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -565,7 +565,7 @@ define float @fadd_f32(float* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP16:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -636,7 +636,7 @@ define float @fmul_f32(float* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP18:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP5]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -703,7 +703,7 @@ define i32 @smin_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP20:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -773,7 +773,7 @@ define i32 @smax_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP22:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -843,7 +843,7 @@ define i32 @umin_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP24:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -913,7 +913,7 @@ define i32 @umax_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP26:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll index 614d055730d88..267406af706c2 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll @@ -183,7 +183,7 @@ define i32 @add_i32_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] @@ -232,7 +232,7 @@ define i32 @add_i16_i32(i16* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] @@ -282,7 +282,7 @@ define i32 @add_i8_i32(i8* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] @@ -330,7 +330,7 @@ define signext i16 @add_i16_i16(i16* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP3]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] @@ -379,7 +379,7 @@ define signext i16 @add_i8_i16(i8* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP4]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] @@ -427,7 +427,7 @@ define zeroext i8 @add_i8_i8(i8* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[TMP3]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] @@ -664,7 +664,7 @@ define i32 @mla_i32_i32(i32* nocapture readonly %x, i32* nocapture readonly %y, ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP7:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP6]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] @@ -720,7 +720,7 @@ define i32 @mla_i16_i32(i16* nocapture readonly %x, i16* nocapture readonly %y, ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] @@ -778,7 +778,7 @@ define i32 @mla_i8_i32(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP9:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] @@ -834,7 +834,7 @@ define signext i16 @mla_i16_i16(i16* nocapture readonly %x, i16* nocapture reado ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP6]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] @@ -890,7 +890,7 @@ define signext i16 @mla_i8_i16(i8* nocapture readonly %x, i8* nocapture readonly ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP11:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP10:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP8]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP8]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] @@ -946,7 +946,7 @@ define zeroext i8 @mla_i8_i8(i8* nocapture readonly %x, i8* nocapture readonly % ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP8:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[TMP6]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll index cdcb81ec2dc28..075abffb42837 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll @@ -29,8 +29,8 @@ define arm_aapcs_vfpcc i32 @minmaxval4(i32* nocapture readonly %x, i32* nocaptur ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> [[TMP3]]) -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP3]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll index 95b22eb9660ad..18b130224f987 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll @@ -595,7 +595,7 @@ define i32 @i32_smin_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -665,7 +665,7 @@ define i32 @i32_smax_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -735,7 +735,7 @@ define i32 @i32_umin_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP16:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -805,7 +805,7 @@ define i32 @i32_umax_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP18:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll index fa6f080a652c5..9aa5d586a30e9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll @@ -110,7 +110,7 @@ define double @sumIfVector(double* nocapture readonly %arr) { ; AVX-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 ; AVX-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; AVX: middle.block: -; AVX-NEXT: [[TMP8:%.*]] = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double 0.000000e+00, <4 x double> [[PREDPHI]]) +; AVX-NEXT: [[TMP8:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[PREDPHI]]) ; AVX-NEXT: [[CMP_N:%.*]] = icmp eq i32 32, 32 ; AVX-NEXT: br i1 [[CMP_N]], label [[DONE:%.*]], label [[SCALAR_PH]] ; AVX: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll index 1ce437198cb05..be1b7c2d7ae8b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll @@ -59,7 +59,7 @@ define i32 @inv_val_store_to_inv_address_with_reduction(i32* %a, i64 %n, i32* %b ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP11]], [[TMP10]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <16 x i32> [[TMP12]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX12:%.*]] = add <16 x i32> [[TMP13]], [[BIN_RDX11]] -; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX12]]) +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX12]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 900db950ee31a..f4a6f04252f7a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -97,7 +97,7 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP37]], [[TMP36]] ; CHECK-NEXT: [[BIN_RDX19:%.*]] = add <4 x i32> [[TMP38]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX20:%.*]] = add <4 x i32> [[TMP39]], [[BIN_RDX19]] -; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX20]]) +; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX20]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -263,7 +263,7 @@ define i32 @test_explicit_pred_generic(i64 %len, i1* %test_base) { ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]] -; CHECK-NEXT: [[TMP85:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) +; CHECK-NEXT: [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -450,7 +450,7 @@ define i32 @test_invariant_address(i64 %len, i1* %test_base) { ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP101]], [[TMP100]] ; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP102]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <4 x i32> [[TMP103]], [[BIN_RDX7]] -; CHECK-NEXT: [[TMP105:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX8]]) +; CHECK-NEXT: [[TMP105:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX8]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -776,7 +776,7 @@ define i32 @test_step_narrower_than_access(i64 %len, i1* %test_base) { ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP181]], [[TMP180]] ; CHECK-NEXT: [[BIN_RDX37:%.*]] = add <4 x i32> [[TMP182]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX38:%.*]] = add <4 x i32> [[TMP183]], [[BIN_RDX37]] -; CHECK-NEXT: [[TMP185:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX38]]) +; CHECK-NEXT: [[TMP185:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX38]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -953,7 +953,7 @@ define i32 @test_max_trip_count(i64 %len, i1* %test_base, i64 %n) { ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP82]], [[TMP81]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP84]], [[BIN_RDX10]] -; CHECK-NEXT: [[TMP86:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) +; CHECK-NEXT: [[TMP86:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1126,7 +1126,7 @@ define i32 @test_non_zero_start(i64 %len, i1* %test_base) { ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]] -; CHECK-NEXT: [[TMP85:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) +; CHECK-NEXT: [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 3072, 3072 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1477,7 +1477,7 @@ define i32 @test_non_unit_stride(i64 %len, i1* %test_base) { ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP149]], [[TMP148]] ; CHECK-NEXT: [[BIN_RDX37:%.*]] = add <4 x i32> [[TMP150]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX38:%.*]] = add <4 x i32> [[TMP151]], [[BIN_RDX37]] -; CHECK-NEXT: [[TMP153:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX38]]) +; CHECK-NEXT: [[TMP153:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX38]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 2048, 2048 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1644,7 +1644,7 @@ define i32 @neg_off_by_many(i64 %len, i1* %test_base) { ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]] -; CHECK-NEXT: [[TMP85:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) +; CHECK-NEXT: [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1811,7 +1811,7 @@ define i32 @neg_off_by_one_iteration(i64 %len, i1* %test_base) { ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]] -; CHECK-NEXT: [[TMP85:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) +; CHECK-NEXT: [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1978,7 +1978,7 @@ define i32 @neg_off_by_one_byte(i64 %len, i1* %test_base) { ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]] -; CHECK-NEXT: [[TMP85:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) +; CHECK-NEXT: [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll index e71c40419f9f1..a833b982e9952 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -90,7 +90,7 @@ define i32 @main() local_unnamed_addr #0 { ; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP27]], [[TMP26]] -; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) +; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP7]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll b/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll index a8dc5350c08a3..f9a469cd093e2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll @@ -27,7 +27,7 @@ define zeroext i8 @sum() { ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <64 x i8> [[TMP5]], [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> [[BIN_RDX]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> [[BIN_RDX]]) ; CHECK-NEXT: ret i8 [[TMP7]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll index cb0d0503dcb1c..f2498b97ca2be 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll @@ -76,7 +76,7 @@ define float @reduction_sum_float_fastmath(i32 %n, float* %array) { ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP9]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[BIN_RDX]]) +; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[BIN_RDX]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -148,7 +148,7 @@ define float @reduction_sum_float_only_reassoc(i32 %n, float* %array) { ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc <4 x float> [[TMP9]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = call reassoc float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[BIN_RDX]]) +; CHECK-NEXT: [[TMP11:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[BIN_RDX]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -220,7 +220,7 @@ define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, float* %arra ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6 ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc contract <4 x float> [[TMP9]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = call reassoc contract float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[BIN_RDX]]) +; CHECK-NEXT: [[TMP11:%.*]] = call reassoc contract float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[BIN_RDX]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll index 0055a66db59db..99acd8e078807 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll @@ -62,7 +62,7 @@ define i32 @matrix_row_col([100 x i32]* nocapture readonly %data, i32 %i, i32 %j ; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 ; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP37]]) +; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP37]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 100, 96 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll index 044128c8ee74d..1017d79792f52 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll @@ -195,7 +195,7 @@ define i32 @reduction_i32(i32* nocapture readonly %A, i32* nocapture readonly %B ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP15]]) +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP15]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll index a4f9ac13664a0..fe5cc16314a27 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll @@ -27,13 +27,13 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 12 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP9]] = add i32 [[TMP8]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD4]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD4]]) ; CHECK-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI1]] -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD5]]) +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD5]]) ; CHECK-NEXT: [[TMP13]] = add i32 [[TMP12]], [[VEC_PHI2]] -; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD6]]) +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP15]] = add i32 [[TMP14]], [[VEC_PHI3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index 3994d64f91d3c..adcf8f5f0b938 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -15,7 +15,7 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP3]] = add i32 [[TMP2]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 @@ -65,11 +65,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND2]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND2]]) ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[TMP9]] = add i32 [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], @@ -121,7 +121,7 @@ define i32 @reduction_sum_const(i32* noalias nocapture %A) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP4]] = add i32 [[TMP3]], 12 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 @@ -173,11 +173,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[VEC_IND2]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VEC_IND2]]) ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD1]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[TMP9]] = mul i32 [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], @@ -234,9 +234,9 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B) ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND2]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND2]]) ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[TMP8]] = add i32 [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], @@ -291,9 +291,9 @@ define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD1]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[TMP7]] = mul i32 [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 @@ -346,7 +346,7 @@ define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* no ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[TMP6]] = add i32 [[TMP5]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 @@ -398,9 +398,9 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> [[WIDE_LOAD1]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[TMP7]] = and i32 [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 @@ -453,7 +453,7 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[TMP6]] = or i32 [[TMP5]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 @@ -506,7 +506,7 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[TMP6]] = xor i32 [[TMP5]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 @@ -558,9 +558,9 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP5:%.*]] = fadd float [[TMP4]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[WIDE_LOAD1]]) +; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[TMP7]] = fadd float [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 @@ -612,9 +612,9 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP5:%.*]] = fmul float [[TMP4]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD1]]) +; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[TMP7]] = fmul float [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 @@ -663,7 +663,7 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt i32 [[TMP2]], [[VEC_PHI]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP2]], i32 [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 @@ -711,7 +711,7 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt i32 [[TMP2]], [[VEC_PHI]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP2]], i32 [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 @@ -765,7 +765,7 @@ define i32 @reduction_sub_lhs(i32* noalias nocapture %A) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !30 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]]) ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -827,7 +827,7 @@ define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) { ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !32 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI3]]) +; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI3]]) ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -950,11 +950,11 @@ define i32 @reduction_predicated(i32* noalias nocapture %A, i32* noalias nocaptu ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND2]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND2]]) ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[TMP9]] = add i32 [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], @@ -1012,7 +1012,7 @@ define i8 @reduction_add_trunc(i8* noalias nocapture %A) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !36 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP3]]) ; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] @@ -1059,7 +1059,7 @@ define i8 @reduction_and_trunc(i8* noalias nocapture %A) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[TMP6]] = and i32 [[TMP5]], [[TMP0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll index b8a4d16627fe4..1d450e4c52596 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll @@ -10,7 +10,7 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) { ; CHECK: [[TMP24:%.*]] = select <4 x i1> [[TMP0:%.*]], <4 x i32> [[TMP23:%.*]], <4 x i32> zeroinitializer ; CHECK: [[TMP25]] = add <4 x i32> [[VEC_PHI]], [[TMP24]] ; CHECK: middle.block: -; CHECK: [[TMP27:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP25]]) +; CHECK: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]]) ; entry: br label %.lr.ph @@ -38,7 +38,7 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) ; CHECK: [[TMP46:%.*]] = add <4 x i32> [[TMP45]], [[TMP43:%.*]] ; CHECK: [[TMP47]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]] ; CHECK: middle.block: -; CHECK: [[TMP49:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP47]]) +; CHECK: [[TMP49:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP47]]) ; entry: br label %.lr.ph @@ -70,7 +70,7 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) ; CHECK: [[TMP45:%.*]] = mul <4 x i32> [[TMP44]], [[TMP43:%.*]] ; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]] ; CHECK: middle.block: -; CHECK: [[TMP48:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[TMP46]]) +; CHECK: [[TMP48:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP46]]) ; entry: br label %.lr.ph @@ -101,7 +101,7 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) { ; CHECK: [[TMP45:%.*]] = and <4 x i32> [[TMP44]], [[TMP43:%.*]] ; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]] ; CHECK: middle.block: -; CHECK: [[TMP48:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> [[TMP46]]) +; CHECK: [[TMP48:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP46]]) ; entry: br label %for.body @@ -131,7 +131,7 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) { ; CHECK: [[TMP45:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP44:%.*]], <4 x i32> zeroinitializer ; CHECK: [[TMP46]] = or <4 x i32> [[VEC_PHI]], [[TMP45]] ; CHECK: middle.block: -; CHECK: [[TMP48:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> [[TMP46]]) +; CHECK: [[TMP48:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP46]]) ; entry: br label %for.body @@ -161,7 +161,7 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) { ; CHECK: [[TMP45:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP44:%.*]], <4 x i32> zeroinitializer ; CHECK: [[TMP46]] = xor <4 x i32> [[VEC_PHI]], [[TMP45]] ; CHECK: middle.block: -; CHECK: [[TMP48:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> [[TMP46]]) +; CHECK: [[TMP48:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP46]]) ; entry: br label %for.body @@ -192,7 +192,7 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) { ; CHECK: [[TMP45:%.*]] = fadd fast <4 x float> [[TMP44]], [[TMP43:%.*]] ; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x float> [[TMP45]], <4 x float> [[VEC_PHI]] ; CHECK: middle.block: -; CHECK: [[TMP48:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP46]]) +; CHECK: [[TMP48:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP46]]) ; entry: br label %for.body @@ -223,7 +223,7 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) { ; CHECK: [[TMP45:%.*]] = fmul fast <4 x float> [[TMP44]], [[TMP43:%.*]] ; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x float> [[TMP45]], <4 x float> [[VEC_PHI]] ; CHECK: middle.block: -; CHECK: [[TMP48:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> [[TMP46]]) +; CHECK: [[TMP48:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP46]]) ; entry: br label %for.body @@ -254,7 +254,7 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) { ; CHECK: [[TMP25:%.*]] = select <4 x i1> [[TMP24]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]] ; CHECK: [[TMP26]] = select <4 x i1> [[TMP0:%.*]], <4 x i32> [[TMP25]], <4 x i32> [[VEC_PHI]] ; CHECK: middle.block: -; CHECK: [[TMP28:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP26]]) +; CHECK: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP26]]) ; entry: br label %for.body @@ -283,7 +283,7 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) { ; CHECK: [[TMP25:%.*]] = select <4 x i1> [[TMP24]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]] ; CHECK: [[TMP26]] = select <4 x i1> [[TMP0:%.*]], <4 x i32> [[TMP25]], <4 x i32> [[VEC_PHI]] ; CHECK: middle.block: -; CHECK: [[TMP28:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> [[TMP26]]) +; CHECK: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP26]]) ; entry: br label %for.body diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll index cf01ead15b0e5..5e7a8dbb72704 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll @@ -8,7 +8,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @ext_ext_or_reduction_v4i32( ; CHECK-NEXT: [[Z:%.*]] = and <4 x i32> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> [[Z]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[Z]]) ; CHECK-NEXT: ret i32 [[TMP1]] ; %z = and <4 x i32> %x, %y @@ -74,7 +74,7 @@ define i32 @TestVectorsEqual(i32* noalias %Vec0, i32* noalias %Vec1, i32 %Tolera ; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]] -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) ; CHECK-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TOLERANCE:%.*]] ; CHECK-NEXT: [[COND6:%.*]] = zext i1 [[CMP5]] to i32 ; CHECK-NEXT: ret i32 [[COND6]] @@ -133,7 +133,7 @@ define i32 @TestVectorsEqual_alt(i32* noalias %Vec0, i32* noalias %Vec1, i32 %To ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[VEC1:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP5]], [[TOLERANCE:%.*]] ; CHECK-NEXT: [[COND:%.*]] = zext i1 [[CMP3]] to i32 ; CHECK-NEXT: ret i32 [[COND]] @@ -181,7 +181,7 @@ define i32 @TestVectorsEqualFP(float* noalias %Vec0, float* noalias %Vec1, float ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <4 x float> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) ; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ole float [[TMP6]], [[TOLERANCE:%.*]] ; CHECK-NEXT: [[COND5:%.*]] = zext i1 [[CMP4]] to i32 ; CHECK-NEXT: ret i32 [[COND5]] @@ -240,7 +240,7 @@ define i32 @TestVectorsEqualFP_alt(float* noalias %Vec0, float* noalias %Vec1, f ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[VEC1:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <4 x float> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP4]]) ; CHECK-NEXT: [[CMP3:%.*]] = fcmp fast ole float [[TMP5]], [[TOLERANCE:%.*]] ; CHECK-NEXT: [[COND:%.*]] = zext i1 [[CMP3]] to i32 ; CHECK-NEXT: ret i32 [[COND]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll index 2bc5469a7d882..4c2d51b188e62 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll @@ -23,7 +23,7 @@ define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP9]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]]) ; CHECK-NEXT: ret i32 [[TMP10]] ; %tmp00 = lshr i32 %a, 15 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll index 5f8cbd5c8d010..e46f924852e90 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll @@ -17,7 +17,7 @@ define void @PR28330(i32 %n) { ; DEFAULT: for.body: ; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> -; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) +; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]] ; DEFAULT-NEXT: br label [[FOR_BODY]] ; @@ -61,7 +61,7 @@ define void @PR28330(i32 %n) { ; GATHER-NEXT: [[TMP32:%.*]] = insertelement <8 x i32> [[TMP31]], i32 [[TMP25]], i32 6 ; GATHER-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP18]], i32 7 ; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7 -; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]]) +; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP34]]) ; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], [[P17]] ; GATHER-NEXT: br label [[FOR_BODY]] ; @@ -153,7 +153,7 @@ define void @PR32038(i32 %n) { ; DEFAULT: for.body: ; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> -; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) +; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], -5 ; DEFAULT-NEXT: br label [[FOR_BODY]] ; @@ -197,7 +197,7 @@ define void @PR32038(i32 %n) { ; GATHER-NEXT: [[TMP32:%.*]] = insertelement <8 x i32> [[TMP31]], i32 [[TMP25]], i32 6 ; GATHER-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP18]], i32 7 ; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7 -; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]]) +; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP34]]) ; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], -5 ; GATHER-NEXT: br label [[FOR_BODY]] ; @@ -229,7 +229,7 @@ define void @PR32038(i32 %n) { ; MAX-COST-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> , <4 x i32> ; MAX-COST-NEXT: [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80 ; MAX-COST-NEXT: [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) +; MAX-COST-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) ; MAX-COST-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[P27]] ; MAX-COST-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[P29]] ; MAX-COST-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP11]], -5 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll index ab07e8c248a0d..1f801834add08 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll @@ -46,7 +46,7 @@ define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias noca ; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]] -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP8]], [[S_026]] ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[P1_023]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR29]] = getelementptr inbounds i32, i32* [[P2_024]], i64 [[IDX_EXT]] @@ -169,7 +169,7 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[P2_018]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP5]], [[S_020]] ; CHECK-NEXT: [[CMP14:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]] ; CHECK-NEXT: br i1 [[CMP14]], label [[IF_END]], label [[FOR_END_LOOPEXIT:%.*]] @@ -285,7 +285,7 @@ define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noali ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = sub nsw <8 x i32> zeroinitializer, [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[TMP8]], <8 x i32> [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP9]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]]) ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP10]], [[S_047]] ; CHECK-NEXT: [[CMP83:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]] ; CHECK-NEXT: br i1 [[CMP83]], label [[IF_END_86]], label [[FOR_END_LOOPEXIT:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll index 6d2c2f410ade1..ec634a144fcb9 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -244,7 +244,7 @@ define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-NEXT: [[TMP12:%.*]] = mul nuw <4 x i32> [[TMP11]], ; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP9]] ; CHECK-NEXT: [[TMP14:%.*]] = xor <4 x i32> [[TMP13]], [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP14]]) ; CHECK-NEXT: ret i32 [[TMP15]] ; %v0.0 = extractelement <4 x i32> %v0, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll index fc763b0df2464..d420454608126 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll @@ -19,7 +19,7 @@ define void @mainTest(i32* %ptr) #0 { ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP4]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP6]] to i64 -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP10]], 1 ; CHECK-NEXT: [[OP_EXTRA1:%.*]] = add i32 [[OP_EXTRA]], [[TMP7]] ; CHECK-NEXT: [[OP_EXTRA2:%.*]] = add i32 [[OP_EXTRA1]], [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll index 3a6901b51b4f8..d4a0710406d31 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll @@ -20,7 +20,7 @@ define void @test() #0 { ; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32 ; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> , [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = ashr exact <4 x i64> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> [[TMP9]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP9]]) ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NEXT: [[OP_EXTRA1]] = add i64 [[OP_EXTRA]], [[TMP6]] ; CHECK-NEXT: br label [[LOOP]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll index 5c692574b292f..a38aa8ed4a887 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -11,7 +11,7 @@ define void @Test(i32) { ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3]]) ; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0:%.*]] ; CHECK-NEXT: [[OP_EXTRA1:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]] ; CHECK-NEXT: [[OP_EXTRA2:%.*]] = and i32 [[OP_EXTRA1]], [[TMP0]] @@ -62,7 +62,7 @@ define void @Test(i32) { ; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], ; FORCE_REDUCTION-NEXT: [[VAL_20:%.*]] = add i32 [[TMP2]], 1496 ; FORCE_REDUCTION-NEXT: [[VAL_34:%.*]] = add i32 [[TMP2]], 8555 -; FORCE_REDUCTION-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> [[TMP3]]) +; FORCE_REDUCTION-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP3]]) ; FORCE_REDUCTION-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], [[VAL_20]] ; FORCE_REDUCTION-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], [[VAL_34]] ; FORCE_REDUCTION-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll index a1c239c92c8e0..ba499e29eef82 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll @@ -13,7 +13,7 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) { ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15 ; CHECK-NEXT: store atomic i32 [[TMP3]], i32* [[VALS:%.*]] unordered, align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP4]]) ; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP2]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> undef, i32 [[V44]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll index b0971dd804501..af12041c07ecb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll @@ -84,7 +84,7 @@ define float @merge_anyof_v4f32_wrong_first(<4 x float> %x) { ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3 ; CHECK-NEXT: [[CMP3WRONG:%.*]] = fcmp olt float [[TMP1]], 4.200000e+01 ; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x float> [[X]], -; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[CMP3WRONG]] ; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP4]], float -1.000000e+00, float 1.000000e+00 ; CHECK-NEXT: ret float [[R]] @@ -111,7 +111,7 @@ define float @merge_anyof_v4f32_wrong_last(<4 x float> %x) { ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3 ; CHECK-NEXT: [[CMP3WRONG:%.*]] = fcmp olt float [[TMP1]], 4.200000e+01 ; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x float> [[X]], -; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[CMP3WRONG]] ; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP4]], float -1.000000e+00, float 1.000000e+00 ; CHECK-NEXT: ret float [[R]] @@ -138,7 +138,7 @@ define i32 @merge_anyof_v4i32_wrong_middle(<4 x i32> %x) { ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3 ; CHECK-NEXT: [[CMP3WRONG:%.*]] = icmp slt i32 [[TMP1]], 42 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], -; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[CMP3WRONG]] ; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP4]], i32 -1, i32 1 ; CHECK-NEXT: ret i32 [[R]] @@ -169,7 +169,7 @@ define i32 @merge_anyof_v4i32_wrong_middle_better_rdx(<4 x i32> %x, <4 x i32> %y ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3 ; CHECK-NEXT: [[CMP3WRONG:%.*]] = icmp slt i32 [[TMP2]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[X]], [[Y]] -; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[CMP3WRONG]] ; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP5]], i32 -1, i32 1 ; CHECK-NEXT: ret i32 [[R]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index dd5d649c41bb4..18d9fae436aee 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -102,7 +102,7 @@ define float @bazz() { ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] ; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 ; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float -; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.000000e+00, <8 x float> [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP3]]) ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] ; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]] ; CHECK-NEXT: store float [[OP_EXTRA1]], float* @res, align 4 @@ -118,7 +118,7 @@ define float @bazz() { ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] ; THRESHOLD-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 ; THRESHOLD-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float -; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.000000e+00, <8 x float> [[TMP3]]) +; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP3]]) ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] ; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]] ; THRESHOLD-NEXT: store float [[OP_EXTRA1]], float* @res, align 4 @@ -175,7 +175,7 @@ define float @bazzz() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] ; CHECK-NEXT: store float [[TMP5]], float* @res, align 4 ; CHECK-NEXT: ret float [[TMP5]] @@ -187,7 +187,7 @@ define float @bazzz() { ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] -; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) +; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) ; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] ; THRESHOLD-NEXT: store float [[TMP5]], float* @res, align 4 ; THRESHOLD-NEXT: ret float [[TMP5]] @@ -223,7 +223,7 @@ define i32 @foo() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] ; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32 ; CHECK-NEXT: store i32 [[CONV4]], i32* @n, align 4 @@ -236,7 +236,7 @@ define i32 @foo() { ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] -; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) +; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) ; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] ; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32 ; THRESHOLD-NEXT: store i32 [[CONV4]], i32* @n, align 4 @@ -390,8 +390,8 @@ define float @f(float* nocapture readonly %x) { ; CHECK-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v32f32(float 0.000000e+00, <32 x float> [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.000000e+00, <16 x float> [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP1]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret float [[OP_RDX]] ; @@ -448,8 +448,8 @@ define float @f(float* nocapture readonly %x) { ; THRESHOLD-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 -; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v32f32(float 0.000000e+00, <32 x float> [[TMP3]]) -; THRESHOLD-NEXT: [[TMP5:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.000000e+00, <16 x float> [[TMP1]]) +; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> [[TMP3]]) +; THRESHOLD-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP1]]) ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]] ; THRESHOLD-NEXT: ret float [[OP_RDX]] ; @@ -637,7 +637,7 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) { ; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v32f32(float 0.000000e+00, <32 x float> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> [[TMP1]]) ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] ; CHECK-NEXT: ret float [[OP_EXTRA]] ; @@ -678,7 +678,7 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) { ; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v32f32(float 0.000000e+00, <32 x float> [[TMP1]]) +; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> [[TMP1]]) ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA]] ; @@ -824,10 +824,10 @@ define float @loadadd31(float* nocapture readonly %x) { ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.000000e+00, <16 x float> [[TMP7]]) -; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.000000e+00, <8 x float> [[TMP5]]) +; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP5]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) +; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) ; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX1]], [[TMP1]] ; CHECK-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] @@ -873,10 +873,10 @@ define float @loadadd31(float* nocapture readonly %x) { ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; THRESHOLD-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* ; THRESHOLD-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 -; THRESHOLD-NEXT: [[TMP8:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.000000e+00, <16 x float> [[TMP7]]) -; THRESHOLD-NEXT: [[TMP9:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.000000e+00, <8 x float> [[TMP5]]) +; THRESHOLD-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP7]]) +; THRESHOLD-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP5]]) ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]] -; THRESHOLD-NEXT: [[TMP10:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) +; THRESHOLD-NEXT: [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) ; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]] ; THRESHOLD-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX1]], [[TMP1]] ; THRESHOLD-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] @@ -990,7 +990,7 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) { ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.000000e+00, <8 x float> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]]) ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] ; CHECK-NEXT: ret float [[OP_EXTRA1]] @@ -1009,7 +1009,7 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) { ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.000000e+00, <8 x float> [[TMP1]]) +; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]]) ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA1]] @@ -1060,7 +1060,7 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.000000e+00, <8 x float> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]]) ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00 ; CHECK-NEXT: [[OP_EXTRA2:%.*]] = fadd fast float [[OP_EXTRA1]], 5.000000e+00 @@ -1081,7 +1081,7 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.000000e+00, <8 x float> [[TMP1]]) +; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]]) ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00 ; THRESHOLD-NEXT: [[OP_EXTRA2:%.*]] = fadd fast float [[OP_EXTRA1]], 5.000000e+00 @@ -1138,7 +1138,7 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.000000e+00, <8 x float> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]]) ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] ; CHECK-NEXT: ret float [[OP_EXTRA1]] @@ -1159,7 +1159,7 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.000000e+00, <8 x float> [[TMP1]]) +; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]]) ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA1]] @@ -1212,7 +1212,7 @@ define i32 @wobble(i32 %arg, i32 %bar) { ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] ; CHECK-NEXT: [[OP_EXTRA1:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]] ; CHECK-NEXT: ret i32 [[OP_EXTRA1]] @@ -1231,7 +1231,7 @@ define i32 @wobble(i32 %arg, i32 %bar) { ; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 ; THRESHOLD-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer ; THRESHOLD-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> -; THRESHOLD-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) +; THRESHOLD-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] ; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]] ; THRESHOLD-NEXT: ret i32 [[OP_EXTRA1]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll index 9663ede723cc6..58056a592c3b1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -12,7 +12,7 @@ define i32 @maxi8(i32) { ; CHECK-LABEL: @maxi8( ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]]) ; CHECK-NEXT: ret i32 [[TMP3]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 @@ -43,7 +43,7 @@ define i32 @maxi8(i32) { define i32 @maxi16(i32) { ; CHECK-LABEL: @maxi16( ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> [[TMP2]]) ; CHECK-NEXT: ret i32 [[TMP3]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 @@ -98,7 +98,7 @@ define i32 @maxi16(i32) { define i32 @maxi32(i32) { ; CHECK-LABEL: @maxi32( ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> [[TMP2]]) ; CHECK-NEXT: ret i32 [[TMP3]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 @@ -758,7 +758,7 @@ define i32 @maxi8_mutiple_uses(i32) { ; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 ; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; AVX-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]]) +; AVX-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]]) ; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]] ; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]] ; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]] @@ -776,7 +776,7 @@ define i32 @maxi8_mutiple_uses(i32) { ; THRESH-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 ; THRESH-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 ; THRESH-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; THRESH-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) +; THRESH-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) ; THRESH-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> undef, i32 [[TMP7]], i32 0 ; THRESH-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP3]], i32 1 ; THRESH-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> undef, i32 [[TMP6]], i32 0 @@ -860,7 +860,7 @@ define i32 @maxi8_wrong_parent(i32) { ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 ; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 ; AVX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; AVX-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]]) +; AVX-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]]) ; AVX-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]] ; AVX-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]] ; AVX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] @@ -879,7 +879,7 @@ define i32 @maxi8_wrong_parent(i32) { ; THRESH-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 ; THRESH-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 ; THRESH-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; THRESH-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]]) +; THRESH-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]]) ; THRESH-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]] ; THRESH-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]] ; THRESH-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll index 703ebbf9dd732..e8e21d4ff7329 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -37,7 +37,7 @@ define i32 @add_red(float* %A, i32 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) ; CHECK-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]] ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] @@ -70,7 +70,7 @@ define i32 @add_red(float* %A, i32 %n) { ; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* ; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; STORE-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], -; STORE-NEXT: [[TMP4:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) +; STORE-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) ; STORE-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]] ; STORE-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] @@ -164,7 +164,7 @@ define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) ; CHECK-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]] ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_040]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] @@ -202,7 +202,7 @@ define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) { ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; STORE-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]] -; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) +; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) ; STORE-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]] ; STORE-NEXT: [[INC]] = add nsw i64 [[I_040]], 1 ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] @@ -326,7 +326,7 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) { ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]] ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4 ; CHECK-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.000000e+00, <8 x float> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP6]]) ; CHECK-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]] ; CHECK-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]] ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_083]], 1 @@ -383,7 +383,7 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) { ; STORE-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]] ; STORE-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4 ; STORE-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]] -; STORE-NEXT: [[TMP8:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.000000e+00, <8 x float> [[TMP6]]) +; STORE-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP6]]) ; STORE-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]] ; STORE-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]] ; STORE-NEXT: [[INC]] = add nsw i64 [[I_083]], 1 @@ -520,7 +520,7 @@ define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) ; CHECK-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]] ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_043]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] @@ -558,7 +558,7 @@ define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) { ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] -; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) +; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) ; STORE-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]] ; STORE-NEXT: [[INC]] = add nsw i64 [[I_043]], 1 ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] @@ -1015,7 +1015,7 @@ define i32 @store_red(float* noalias %A, float* noalias %B, float* noalias %C, i ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP4]] -; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) +; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) ; STORE-NEXT: store float [[TMP6]], float* [[C_ADDR_038]], align 4 ; STORE-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[C_ADDR_038]], i64 1 ; STORE-NEXT: [[INC]] = add nsw i64 [[I_039]], 1 @@ -1090,7 +1090,7 @@ define void @float_red_example4(float* %res) { ; STORE-LABEL: @float_red_example4( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([32 x float]* @arr_float to <4 x float>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP0]]) +; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP0]]) ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1130,7 +1130,7 @@ define void @float_red_example8(float* %res) { ; STORE-LABEL: @float_red_example8( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr_float to <8 x float>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.000000e+00, <8 x float> [[TMP0]]) +; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP0]]) ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1194,7 +1194,7 @@ define void @float_red_example16(float* %res) { ; STORE-LABEL: @float_red_example16( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr_float to <16 x float>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.000000e+00, <16 x float> [[TMP0]]) +; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP0]]) ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1250,7 +1250,7 @@ define void @i32_red_example4(i32* %res) { ; STORE-LABEL: @i32_red_example4( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP0]]) +; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]]) ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1290,7 +1290,7 @@ define void @i32_red_example8(i32* %res) { ; STORE-LABEL: @i32_red_example8( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) +; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1354,7 +1354,7 @@ define void @i32_red_example16(i32* %res) { ; STORE-LABEL: @i32_red_example16( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr_i32 to <16 x i32>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> [[TMP0]]) +; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP0]]) ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1466,7 +1466,7 @@ define void @i32_red_example32(i32* %res) { ; STORE-LABEL: @i32_red_example32( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr_i32 to <32 x i32>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> [[TMP0]]) +; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> [[TMP0]]) ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1544,14 +1544,14 @@ define void @i32_red_call(i32 %val) { ; CHECK-LABEL: @i32_red_call( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) ; CHECK-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]]) ; CHECK-NEXT: ret void ; ; STORE-LABEL: @i32_red_call( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) +; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) ; STORE-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]]) ; STORE-NEXT: ret void ; @@ -1579,7 +1579,7 @@ define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_ ; CHECK-LABEL: @i32_red_invoke( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) ; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]]) ; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] ; CHECK: exception: @@ -1592,7 +1592,7 @@ define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_ ; STORE-LABEL: @i32_red_invoke( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) +; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) ; STORE-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]]) ; STORE-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] ; STORE: exception: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll b/llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll index 0879e358ad732..470a7d44c03f8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll @@ -5,7 +5,7 @@ define signext i8 @Foo(<32 x i8>* %__v) { ; CHECK-LABEL: @Foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <32 x i8>, <32 x i8>* [[__V:%.*]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> [[TMP0]]) ; CHECK-NEXT: ret i8 [[TMP1]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll index 30ceee8f9e81c..bcca68fb38a1c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll @@ -80,7 +80,7 @@ define i32 @horiz_max_multiple_uses([32 x i32]* %x, i32* %p) { ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[T4:%.*]] = load i32, i32* [[X4]], align 4 ; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[X5]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[T4]] ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 [[T4]] ; CHECK-NEXT: [[C012345:%.*]] = icmp sgt i32 [[TMP5]], [[T5]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll index d0bb09a9e24a9..12d2355561b70 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll @@ -35,7 +35,7 @@ define i32 @test(i32* nocapture readonly %p) { ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[SUM]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: @@ -124,7 +124,7 @@ define i32 @test2(i32* nocapture readonly %p, i32* nocapture readonly %q) { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]]) ; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: @@ -230,7 +230,7 @@ define i32 @test3(i32* nocapture readonly %p, i32* nocapture readonly %q) { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[REORDER_SHUFFLE]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]]) ; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll index 0d5050412f283..6322551facc13 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll @@ -26,7 +26,7 @@ define i32 @test_add(i32* nocapture readonly %p) { ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP2]] ; entry: @@ -74,7 +74,7 @@ define i32 @test_mul(i32* nocapture readonly %p) { ; AVX-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; AVX-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; AVX-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> [[TMP1]]) +; AVX-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP1]]) ; AVX-NEXT: ret i32 [[TMP2]] ; ; SSE-LABEL: @test_mul( @@ -148,7 +148,7 @@ define i32 @test_and(i32* nocapture readonly %p) { ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP2]] ; entry: @@ -196,7 +196,7 @@ define i32 @test_or(i32* nocapture readonly %p) { ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP2]] ; entry: @@ -244,7 +244,7 @@ define i32 @test_xor(i32* nocapture readonly %p) { ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP2]] ; entry: @@ -284,7 +284,7 @@ define i32 @PR37731(<4 x i32>* noalias nocapture dereferenceable(16) %self) unna ; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[TMP4]], ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[TMP5]] ; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[SELF]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> [[TMP6]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP6]]) ; CHECK-NEXT: ret i32 [[TMP7]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll index 518ec13a36fdd..149303c4bdc46 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll @@ -38,7 +38,7 @@ define i32 @foo(i32* %diff) #0 { ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 3 ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[TMP14]], align 16 -; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP13]]) +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]]) ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP15]], [[A_088]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll index 9ed21a1c3f8ca..4cbc06ae60357 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll @@ -18,14 +18,14 @@ define void @hoge() { ; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef ; CHECK-NEXT: [[SHUFFLE5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> undef, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE5]], -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[T19:%.*]] = select i1 undef, i32 [[TMP6]], i32 undef ; CHECK-NEXT: [[T20:%.*]] = icmp sgt i32 [[T19]], 63 ; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP2]] ; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP9]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = icmp slt i32 [[TMP10]], undef ; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef ; CHECK-NEXT: [[TMP12:%.*]] = icmp slt i32 [[OP_EXTRA]], undef diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll b/llvm/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll index 7898d41128902..e2659dd121fae 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll @@ -5,7 +5,7 @@ define float @dotf(<4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: @dotf( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = fmul fast <4 x float> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP0]]) ; CHECK-NEXT: ret float [[TMP1]] ; entry: @@ -33,7 +33,7 @@ define double @dotd(<4 x double>* byval nocapture readonly align 32, <4 x double ; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[TMP0:%.*]], align 32 ; CHECK-NEXT: [[Y:%.*]] = load <4 x double>, <4 x double>* [[TMP1:%.*]], align 32 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x double> [[X]], [[Y]] -; CHECK-NEXT: [[TMP3:%.*]] = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double 0.000000e+00, <4 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP2]]) ; CHECK-NEXT: ret double [[TMP3]] ; entry: @@ -63,7 +63,7 @@ define float @dotfq(<4 x float>* nocapture readonly %x, <4 x float>* nocapture r ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[X:%.*]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[Y:%.*]], align 16 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP2]]) ; CHECK-NEXT: ret float [[TMP3]] ; entry: @@ -93,7 +93,7 @@ define double @dotdq(<4 x double>* nocapture readonly %x, <4 x double>* nocaptur ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, <4 x double>* [[X:%.*]], align 32 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[Y:%.*]], align 32 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x double> [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double 0.000000e+00, <4 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP2]]) ; CHECK-NEXT: ret double [[TMP3]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll index d3bf7f1dd4989..875fcbc52e6a8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll @@ -37,7 +37,7 @@ define i32 @foo(i32* nocapture readonly %diff) #0 { ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 3 ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[TMP14]], align 16 -; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP13]]) +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]]) ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP15]], [[A_088]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll b/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll index 343f98957a5c1..3b5a53e44dec1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll @@ -16,7 +16,7 @@ define void @_Z2azv() local_unnamed_addr { ; CHECK-NEXT: [[DOTSROA_RAW_IDX_7:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 7, i32 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTSROA_CAST_4]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], undef ; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 undef ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[OP_EXTRA]], undef diff --git a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll index 5818bd29b4c68..a7a6e1005e5cb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll @@ -55,7 +55,7 @@ define void @n() local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP38:%.*]] = icmp slt <4 x i32> [[TMP37]], zeroinitializer ; CHECK-NEXT: [[TMP39:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP37]] ; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP38]], <4 x i32> [[TMP39]], <4 x i32> [[TMP37]] -; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP40]]) +; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP40]]) ; CHECK-NEXT: [[TMP42:%.*]] = icmp slt i32 [[TMP41]], [[TMP32]] ; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[TMP41]], i32 [[TMP32]] ; CHECK-NEXT: [[TMP44:%.*]] = icmp slt i32 [[TMP43]], [[B_0]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll index 02e7c5b37f3ee..31b73236f5e70 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll @@ -17,7 +17,7 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A5:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A6:%.*]], i32 7 ; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]]) +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]]) ; CHECK-NEXT: ret i32 [[TMP11]] ; entry: @@ -67,7 +67,7 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A3:%.*]], i32 7 ; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]]) +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]]) ; CHECK-NEXT: ret i32 [[TMP11]] ; entry: @@ -121,7 +121,7 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A1:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A3:%.*]], i32 7 ; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]]) +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]]) ; CHECK-NEXT: ret i32 [[TMP11]] ; entry: diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td index 3c1319a5c625f..9e9237e82b536 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td @@ -291,14 +291,14 @@ class LLVM_OneResultIntrOp overloadedResults = [], // LLVM vector reduction over a single vector. class LLVM_VectorReduction - : LLVM_OneResultIntrOp<"experimental.vector.reduce." # mnem, + : LLVM_OneResultIntrOp<"vector.reduce." # mnem, [], [0], [NoSideEffect]>, Arguments<(ins LLVM_Type)>; // LLVM vector reduction over a single vector, with an initial value, // and with permission to reassociate the reduction operations. -class LLVM_VectorReductionV2 - : LLVM_OpBase + : LLVM_OpBase, Results<(outs LLVM_Type:$res)>, Arguments<(ins LLVM_Type, LLVM_Type, @@ -307,7 +307,7 @@ class LLVM_VectorReductionV2 llvm::Module *module = builder.GetInsertBlock()->getModule(); llvm::Function *fn = llvm::Intrinsic::getDeclaration( module, - llvm::Intrinsic::experimental_vector_reduce_v2_}] # mnem # [{, + llvm::Intrinsic::vector_reduce_}] # mnem # [{, { }] # StrJoin.lst, ListIntSubst.lst)>.result # [{ diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 94f6809349f19..b4554cff7e62f 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -885,20 +885,20 @@ def LLVM_MemcpyInlineOp : LLVM_ZeroResultIntrOp<"memcpy.inline", [0, 1, 2]>, // Vector Reductions. // -def LLVM_experimental_vector_reduce_add : LLVM_VectorReduction<"add">; -def LLVM_experimental_vector_reduce_and : LLVM_VectorReduction<"and">; -def LLVM_experimental_vector_reduce_mul : LLVM_VectorReduction<"mul">; -def LLVM_experimental_vector_reduce_fmax : LLVM_VectorReduction<"fmax">; -def LLVM_experimental_vector_reduce_fmin : LLVM_VectorReduction<"fmin">; -def LLVM_experimental_vector_reduce_or : LLVM_VectorReduction<"or">; -def LLVM_experimental_vector_reduce_smax : LLVM_VectorReduction<"smax">; -def LLVM_experimental_vector_reduce_smin : LLVM_VectorReduction<"smin">; -def LLVM_experimental_vector_reduce_umax : LLVM_VectorReduction<"umax">; -def LLVM_experimental_vector_reduce_umin : LLVM_VectorReduction<"umin">; -def LLVM_experimental_vector_reduce_xor : LLVM_VectorReduction<"xor">; - -def LLVM_experimental_vector_reduce_v2_fadd : LLVM_VectorReductionV2<"fadd">; -def LLVM_experimental_vector_reduce_v2_fmul : LLVM_VectorReductionV2<"fmul">; +def LLVM_vector_reduce_add : LLVM_VectorReduction<"add">; +def LLVM_vector_reduce_and : LLVM_VectorReduction<"and">; +def LLVM_vector_reduce_mul : LLVM_VectorReduction<"mul">; +def LLVM_vector_reduce_fmax : LLVM_VectorReduction<"fmax">; +def LLVM_vector_reduce_fmin : LLVM_VectorReduction<"fmin">; +def LLVM_vector_reduce_or : LLVM_VectorReduction<"or">; +def LLVM_vector_reduce_smax : LLVM_VectorReduction<"smax">; +def LLVM_vector_reduce_smin : LLVM_VectorReduction<"smin">; +def LLVM_vector_reduce_umax : LLVM_VectorReduction<"umax">; +def LLVM_vector_reduce_umin : LLVM_VectorReduction<"umin">; +def LLVM_vector_reduce_xor : LLVM_VectorReduction<"xor">; + +def LLVM_vector_reduce_fadd : LLVM_VectorReductionAcc<"fadd">; +def LLVM_vector_reduce_fmul : LLVM_VectorReductionAcc<"fmul">; // // LLVM Matrix operations. diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index 137e130c45943..f8fc4dead86f3 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -207,7 +207,7 @@ def Vector_ReductionOp : Note that these operations are restricted to 1-D vectors to remain close to the corresponding LLVM intrinsics: - http://llvm.org/docs/LangRef.html#experimental-vector-reduction-intrinsics + http://llvm.org/docs/LangRef.html#vector-reduction-intrinsics Example: diff --git a/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-fp.mlir b/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-fp.mlir index eef0c9d51185f..dbbcecfa3b619 100644 --- a/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-fp.mlir +++ b/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-fp.mlir @@ -24,61 +24,61 @@ module { %12 = llvm.mlir.constant(3 : i64) : !llvm.i64 %v = llvm.insertelement %3, %11[%12 : !llvm.i64] : !llvm.vec<4 x float> - %max = "llvm.intr.experimental.vector.reduce.fmax"(%v) + %max = "llvm.intr.vector.reduce.fmax"(%v) : (!llvm.vec<4 x float>) -> !llvm.float llvm.call @printF32(%max) : (!llvm.float) -> () llvm.call @printNewline() : () -> () // CHECK: 4 - %min = "llvm.intr.experimental.vector.reduce.fmin"(%v) + %min = "llvm.intr.vector.reduce.fmin"(%v) : (!llvm.vec<4 x float>) -> !llvm.float llvm.call @printF32(%min) : (!llvm.float) -> () llvm.call @printNewline() : () -> () // CHECK: 1 - %add1 = "llvm.intr.experimental.vector.reduce.v2.fadd"(%0, %v) + %add1 = "llvm.intr.vector.reduce.fadd"(%0, %v) : (!llvm.float, !llvm.vec<4 x float>) -> !llvm.float llvm.call @printF32(%add1) : (!llvm.float) -> () llvm.call @printNewline() : () -> () // CHECK: 11 - %add1r = "llvm.intr.experimental.vector.reduce.v2.fadd"(%0, %v) + %add1r = "llvm.intr.vector.reduce.fadd"(%0, %v) {reassoc = true} : (!llvm.float, !llvm.vec<4 x float>) -> !llvm.float llvm.call @printF32(%add1r) : (!llvm.float) -> () llvm.call @printNewline() : () -> () // CHECK: 11 - %add2 = "llvm.intr.experimental.vector.reduce.v2.fadd"(%1, %v) + %add2 = "llvm.intr.vector.reduce.fadd"(%1, %v) : (!llvm.float, !llvm.vec<4 x float>) -> !llvm.float llvm.call @printF32(%add2) : (!llvm.float) -> () llvm.call @printNewline() : () -> () // CHECK: 12 - %add2r = "llvm.intr.experimental.vector.reduce.v2.fadd"(%1, %v) + %add2r = "llvm.intr.vector.reduce.fadd"(%1, %v) {reassoc = true} : (!llvm.float, !llvm.vec<4 x float>) -> !llvm.float llvm.call @printF32(%add2r) : (!llvm.float) -> () llvm.call @printNewline() : () -> () // CHECK: 12 - %mul1 = "llvm.intr.experimental.vector.reduce.v2.fmul"(%0, %v) + %mul1 = "llvm.intr.vector.reduce.fmul"(%0, %v) : (!llvm.float, !llvm.vec<4 x float>) -> !llvm.float llvm.call @printF32(%mul1) : (!llvm.float) -> () llvm.call @printNewline() : () -> () // CHECK: 24 - %mul1r = "llvm.intr.experimental.vector.reduce.v2.fmul"(%0, %v) + %mul1r = "llvm.intr.vector.reduce.fmul"(%0, %v) {reassoc = true} : (!llvm.float, !llvm.vec<4 x float>) -> !llvm.float llvm.call @printF32(%mul1r) : (!llvm.float) -> () llvm.call @printNewline() : () -> () // CHECK: 24 - %mul2 = "llvm.intr.experimental.vector.reduce.v2.fmul"(%1, %v) + %mul2 = "llvm.intr.vector.reduce.fmul"(%1, %v) : (!llvm.float, !llvm.vec<4 x float>) -> !llvm.float llvm.call @printF32(%mul2) : (!llvm.float) -> () llvm.call @printNewline() : () -> () // CHECK: 48 - %mul2r = "llvm.intr.experimental.vector.reduce.v2.fmul"(%1, %v) + %mul2r = "llvm.intr.vector.reduce.fmul"(%1, %v) {reassoc = true} : (!llvm.float, !llvm.vec<4 x float>) -> !llvm.float llvm.call @printF32(%mul2r) : (!llvm.float) -> () llvm.call @printNewline() : () -> () diff --git a/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-int.mlir b/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-int.mlir index e4d578e36e23d..7d8c9736a3484 100644 --- a/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-int.mlir +++ b/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-int.mlir @@ -24,55 +24,55 @@ module { %12 = llvm.mlir.constant(3 : i64) : !llvm.i64 %v = llvm.insertelement %3, %11[%12 : !llvm.i64] : !llvm.vec<4 x i64> - %add = "llvm.intr.experimental.vector.reduce.add"(%v) + %add = "llvm.intr.vector.reduce.add"(%v) : (!llvm.vec<4 x i64>) -> !llvm.i64 llvm.call @printI64(%add) : (!llvm.i64) -> () llvm.call @printNewline() : () -> () // CHECK: 10 - %and = "llvm.intr.experimental.vector.reduce.and"(%v) + %and = "llvm.intr.vector.reduce.and"(%v) : (!llvm.vec<4 x i64>) -> !llvm.i64 llvm.call @printI64(%and) : (!llvm.i64) -> () llvm.call @printNewline() : () -> () // CHECK: 0 - %mul = "llvm.intr.experimental.vector.reduce.mul"(%v) + %mul = "llvm.intr.vector.reduce.mul"(%v) : (!llvm.vec<4 x i64>) -> !llvm.i64 llvm.call @printI64(%mul) : (!llvm.i64) -> () llvm.call @printNewline() : () -> () // CHECK: 24 - %or = "llvm.intr.experimental.vector.reduce.or"(%v) + %or = "llvm.intr.vector.reduce.or"(%v) : (!llvm.vec<4 x i64>) -> !llvm.i64 llvm.call @printI64(%or) : (!llvm.i64) -> () llvm.call @printNewline() : () -> () // CHECK: 7 - %smax = "llvm.intr.experimental.vector.reduce.smax"(%v) + %smax = "llvm.intr.vector.reduce.smax"(%v) : (!llvm.vec<4 x i64>) -> !llvm.i64 llvm.call @printI64(%smax) : (!llvm.i64) -> () llvm.call @printNewline() : () -> () // CHECK: 4 - %smin = "llvm.intr.experimental.vector.reduce.smin"(%v) + %smin = "llvm.intr.vector.reduce.smin"(%v) : (!llvm.vec<4 x i64>) -> !llvm.i64 llvm.call @printI64(%smin) : (!llvm.i64) -> () llvm.call @printNewline() : () -> () // CHECK: 1 - %umax = "llvm.intr.experimental.vector.reduce.umax"(%v) + %umax = "llvm.intr.vector.reduce.umax"(%v) : (!llvm.vec<4 x i64>) -> !llvm.i64 llvm.call @printI64(%umax) : (!llvm.i64) -> () llvm.call @printNewline() : () -> () // CHECK: 4 - %umin = "llvm.intr.experimental.vector.reduce.umin"(%v) + %umin = "llvm.intr.vector.reduce.umin"(%v) : (!llvm.vec<4 x i64>) -> !llvm.i64 llvm.call @printI64(%umin) : (!llvm.i64) -> () llvm.call @printNewline() : () -> () // CHECK: 1 - %xor = "llvm.intr.experimental.vector.reduce.xor"(%v) + %xor = "llvm.intr.vector.reduce.xor"(%v) : (!llvm.vec<4 x i64>) -> !llvm.i64 llvm.call @printI64(%xor) : (!llvm.i64) -> () llvm.call @printNewline() : () -> () diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index e6c0feb070f07..ac94a421903dc 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -564,33 +564,33 @@ class VectorReductionOpConversion : public ConvertToLLVMPattern { if (eltType.isIntOrIndex()) { // Integer reductions: add/mul/min/max/and/or/xor. if (kind == "add") - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( op, llvmType, operands[0]); else if (kind == "mul") - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( op, llvmType, operands[0]); else if (kind == "min" && (eltType.isIndex() || eltType.isUnsignedInteger())) - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( op, llvmType, operands[0]); else if (kind == "min") - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( op, llvmType, operands[0]); else if (kind == "max" && (eltType.isIndex() || eltType.isUnsignedInteger())) - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( op, llvmType, operands[0]); else if (kind == "max") - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( op, llvmType, operands[0]); else if (kind == "and") - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( op, llvmType, operands[0]); else if (kind == "or") - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( op, llvmType, operands[0]); else if (kind == "xor") - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( op, llvmType, operands[0]); else return failure(); @@ -604,7 +604,7 @@ class VectorReductionOpConversion : public ConvertToLLVMPattern { : rewriter.create( op->getLoc(), llvmType, rewriter.getZeroAttr(eltType)); - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( op, llvmType, acc, operands[0], rewriter.getBoolAttr(reassociateFPReductions)); } else if (kind == "mul") { @@ -614,14 +614,14 @@ class VectorReductionOpConversion : public ConvertToLLVMPattern { : rewriter.create( op->getLoc(), llvmType, rewriter.getFloatAttr(eltType, 1.0)); - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( op, llvmType, acc, operands[0], rewriter.getBoolAttr(reassociateFPReductions)); } else if (kind == "min") - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( op, llvmType, operands[0]); else if (kind == "max") - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( op, llvmType, operands[0]); else return failure(); diff --git a/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir index a2a47b0eff267..70b0a8fadfeed 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir @@ -5,14 +5,14 @@ // CHECK-LABEL: llvm.func @reduce_add_f32( // CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x float>) // CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float -// CHECK: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.v2.fadd"(%[[C]], %[[A]]) +// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) // CHECK-SAME: {reassoc = false} : (!llvm.float, !llvm.vec<16 x float>) -> !llvm.float // CHECK: llvm.return %[[V]] : !llvm.float // // REASSOC-LABEL: llvm.func @reduce_add_f32( // REASSOC-SAME: %[[A:.*]]: !llvm.vec<16 x float>) // REASSOC: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float -// REASSOC: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.v2.fadd"(%[[C]], %[[A]]) +// REASSOC: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) // REASSOC-SAME: {reassoc = true} : (!llvm.float, !llvm.vec<16 x float>) -> !llvm.float // REASSOC: llvm.return %[[V]] : !llvm.float // @@ -25,14 +25,14 @@ func @reduce_add_f32(%arg0: vector<16xf32>) -> f32 { // CHECK-LABEL: llvm.func @reduce_mul_f32( // CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x float>) // CHECK: %[[C:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : !llvm.float -// CHECK: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.v2.fmul"(%[[C]], %[[A]]) +// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fmul"(%[[C]], %[[A]]) // CHECK-SAME: {reassoc = false} : (!llvm.float, !llvm.vec<16 x float>) -> !llvm.float // CHECK: llvm.return %[[V]] : !llvm.float // // REASSOC-LABEL: llvm.func @reduce_mul_f32( // REASSOC-SAME: %[[A:.*]]: !llvm.vec<16 x float>) // REASSOC: %[[C:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : !llvm.float -// REASSOC: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.v2.fmul"(%[[C]], %[[A]]) +// REASSOC: %[[V:.*]] = "llvm.intr.vector.reduce.fmul"(%[[C]], %[[A]]) // REASSOC-SAME: {reassoc = true} : (!llvm.float, !llvm.vec<16 x float>) -> !llvm.float // REASSOC: llvm.return %[[V]] : !llvm.float // diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index eae918897d32c..ab4948a56791e 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -774,7 +774,7 @@ func @reduce_f16(%arg0: vector<16xf16>) -> f16 { // CHECK-LABEL: llvm.func @reduce_f16( // CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x half>) // CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f16) : !llvm.half -// CHECK: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.v2.fadd"(%[[C]], %[[A]]) +// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) // CHECK-SAME: {reassoc = false} : (!llvm.half, !llvm.vec<16 x half>) -> !llvm.half // CHECK: llvm.return %[[V]] : !llvm.half @@ -785,7 +785,7 @@ func @reduce_f32(%arg0: vector<16xf32>) -> f32 { // CHECK-LABEL: llvm.func @reduce_f32( // CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x float>) // CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float -// CHECK: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.v2.fadd"(%[[C]], %[[A]]) +// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) // CHECK-SAME: {reassoc = false} : (!llvm.float, !llvm.vec<16 x float>) -> !llvm.float // CHECK: llvm.return %[[V]] : !llvm.float @@ -796,7 +796,7 @@ func @reduce_f64(%arg0: vector<16xf64>) -> f64 { // CHECK-LABEL: llvm.func @reduce_f64( // CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x double>) // CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f64) : !llvm.double -// CHECK: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.v2.fadd"(%[[C]], %[[A]]) +// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) // CHECK-SAME: {reassoc = false} : (!llvm.double, !llvm.vec<16 x double>) -> !llvm.double // CHECK: llvm.return %[[V]] : !llvm.double @@ -806,7 +806,7 @@ func @reduce_i8(%arg0: vector<16xi8>) -> i8 { } // CHECK-LABEL: llvm.func @reduce_i8( // CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x i8>) -// CHECK: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.add"(%[[A]]) +// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) // CHECK: llvm.return %[[V]] : !llvm.i8 func @reduce_i32(%arg0: vector<16xi32>) -> i32 { @@ -815,7 +815,7 @@ func @reduce_i32(%arg0: vector<16xi32>) -> i32 { } // CHECK-LABEL: llvm.func @reduce_i32( // CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x i32>) -// CHECK: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.add"(%[[A]]) +// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) // CHECK: llvm.return %[[V]] : !llvm.i32 func @reduce_i64(%arg0: vector<16xi64>) -> i64 { @@ -824,7 +824,7 @@ func @reduce_i64(%arg0: vector<16xi64>) -> i64 { } // CHECK-LABEL: llvm.func @reduce_i64( // CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x i64>) -// CHECK: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.add"(%[[A]]) +// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) // CHECK: llvm.return %[[V]] : !llvm.i64 diff --git a/mlir/test/Target/llvmir-intrinsics.mlir b/mlir/test/Target/llvmir-intrinsics.mlir index 8a598e67d17b8..7ab440a7e5ca7 100644 --- a/mlir/test/Target/llvmir-intrinsics.mlir +++ b/mlir/test/Target/llvmir-intrinsics.mlir @@ -182,36 +182,36 @@ llvm.func @smin_test(%arg0: !llvm.i32, %arg1: !llvm.i32, %arg2: !llvm.vec<8 x i3 // CHECK-LABEL: @vector_reductions llvm.func @vector_reductions(%arg0: !llvm.float, %arg1: !llvm.vec<8 x float>, %arg2: !llvm.vec<8 x i32>) { - // CHECK: call i32 @llvm.experimental.vector.reduce.add.v8i32 - "llvm.intr.experimental.vector.reduce.add"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 - // CHECK: call i32 @llvm.experimental.vector.reduce.and.v8i32 - "llvm.intr.experimental.vector.reduce.and"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 - // CHECK: call float @llvm.experimental.vector.reduce.fmax.v8f32 - "llvm.intr.experimental.vector.reduce.fmax"(%arg1) : (!llvm.vec<8 x float>) -> !llvm.float - // CHECK: call float @llvm.experimental.vector.reduce.fmin.v8f32 - "llvm.intr.experimental.vector.reduce.fmin"(%arg1) : (!llvm.vec<8 x float>) -> !llvm.float - // CHECK: call i32 @llvm.experimental.vector.reduce.mul.v8i32 - "llvm.intr.experimental.vector.reduce.mul"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 - // CHECK: call i32 @llvm.experimental.vector.reduce.or.v8i32 - "llvm.intr.experimental.vector.reduce.or"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 - // CHECK: call i32 @llvm.experimental.vector.reduce.smax.v8i32 - "llvm.intr.experimental.vector.reduce.smax"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 - // CHECK: call i32 @llvm.experimental.vector.reduce.smin.v8i32 - "llvm.intr.experimental.vector.reduce.smin"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 - // CHECK: call i32 @llvm.experimental.vector.reduce.umax.v8i32 - "llvm.intr.experimental.vector.reduce.umax"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 - // CHECK: call i32 @llvm.experimental.vector.reduce.umin.v8i32 - "llvm.intr.experimental.vector.reduce.umin"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 - // CHECK: call float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32 - "llvm.intr.experimental.vector.reduce.v2.fadd"(%arg0, %arg1) : (!llvm.float, !llvm.vec<8 x float>) -> !llvm.float - // CHECK: call float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32 - "llvm.intr.experimental.vector.reduce.v2.fmul"(%arg0, %arg1) : (!llvm.float, !llvm.vec<8 x float>) -> !llvm.float - // CHECK: call reassoc float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32 - "llvm.intr.experimental.vector.reduce.v2.fadd"(%arg0, %arg1) {reassoc = true} : (!llvm.float, !llvm.vec<8 x float>) -> !llvm.float - // CHECK: call reassoc float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32 - "llvm.intr.experimental.vector.reduce.v2.fmul"(%arg0, %arg1) {reassoc = true} : (!llvm.float, !llvm.vec<8 x float>) -> !llvm.float - // CHECK: call i32 @llvm.experimental.vector.reduce.xor.v8i32 - "llvm.intr.experimental.vector.reduce.xor"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 + // CHECK: call i32 @llvm.vector.reduce.add.v8i32 + "llvm.intr.vector.reduce.add"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 + // CHECK: call i32 @llvm.vector.reduce.and.v8i32 + "llvm.intr.vector.reduce.and"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 + // CHECK: call float @llvm.vector.reduce.fmax.v8f32 + "llvm.intr.vector.reduce.fmax"(%arg1) : (!llvm.vec<8 x float>) -> !llvm.float + // CHECK: call float @llvm.vector.reduce.fmin.v8f32 + "llvm.intr.vector.reduce.fmin"(%arg1) : (!llvm.vec<8 x float>) -> !llvm.float + // CHECK: call i32 @llvm.vector.reduce.mul.v8i32 + "llvm.intr.vector.reduce.mul"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 + // CHECK: call i32 @llvm.vector.reduce.or.v8i32 + "llvm.intr.vector.reduce.or"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 + // CHECK: call i32 @llvm.vector.reduce.smax.v8i32 + "llvm.intr.vector.reduce.smax"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 + // CHECK: call i32 @llvm.vector.reduce.smin.v8i32 + "llvm.intr.vector.reduce.smin"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 + // CHECK: call i32 @llvm.vector.reduce.umax.v8i32 + "llvm.intr.vector.reduce.umax"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 + // CHECK: call i32 @llvm.vector.reduce.umin.v8i32 + "llvm.intr.vector.reduce.umin"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 + // CHECK: call float @llvm.vector.reduce.fadd.f32.v8f32 + "llvm.intr.vector.reduce.fadd"(%arg0, %arg1) : (!llvm.float, !llvm.vec<8 x float>) -> !llvm.float + // CHECK: call float @llvm.vector.reduce.fmul.f32.v8f32 + "llvm.intr.vector.reduce.fmul"(%arg0, %arg1) : (!llvm.float, !llvm.vec<8 x float>) -> !llvm.float + // CHECK: call reassoc float @llvm.vector.reduce.fadd.f32.v8f32 + "llvm.intr.vector.reduce.fadd"(%arg0, %arg1) {reassoc = true} : (!llvm.float, !llvm.vec<8 x float>) -> !llvm.float + // CHECK: call reassoc float @llvm.vector.reduce.fmul.f32.v8f32 + "llvm.intr.vector.reduce.fmul"(%arg0, %arg1) {reassoc = true} : (!llvm.float, !llvm.vec<8 x float>) -> !llvm.float + // CHECK: call i32 @llvm.vector.reduce.xor.v8i32 + "llvm.intr.vector.reduce.xor"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 llvm.return } From 69daa368cad34c3cff7e170d2a32652ce31ca9e5 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 6 Oct 2020 12:51:01 +0200 Subject: [PATCH 249/321] [clangd] Disambiguate overloads of std::move for header insertion. Up until now, we relied on matching the filename. This depends on unstable details of libstdc++ and doesn't work well on other stdlibs. Also we'd like to remove it (see D88204). Differential Revision: https://reviews.llvm.org/D88885 --- .../clangd/index/CanonicalIncludes.cpp | 2 ++ .../clangd/index/SymbolCollector.cpp | 22 ++++++++++------ .../clangd/index/SymbolCollector.h | 2 +- .../unittests/CanonicalIncludesTests.cpp | 6 ++--- .../clangd/unittests/SymbolCollectorTests.cpp | 25 ++++++++++++++++--- 5 files changed, 41 insertions(+), 16 deletions(-) diff --git a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp index 2822e359c0a50..120f121c7278e 100644 --- a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp +++ b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp @@ -90,6 +90,8 @@ void CanonicalIncludes::addSystemHeadersMapping(const LangOptions &Language) { static const auto *Symbols = new llvm::StringMap({ #define SYMBOL(Name, NameSpace, Header) {#NameSpace #Name, #Header}, #include "StdSymbolMap.inc" + // There are two std::move()s, this is by far the most common. + SYMBOL(move, std::, ) #undef SYMBOL }); StdSymbolMapping = Symbols; diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp index 2e1f261ab18ae..92ebd90e950c0 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -557,11 +557,9 @@ void SymbolCollector::finish() { llvm::SmallString<256> QName; for (const auto &Entry : IncludeFiles) if (const Symbol *S = Symbols.find(Entry.first)) { - QName = S->Scope; - QName.append(S->Name); - if (auto Header = getIncludeHeader(QName, Entry.second)) { + if (auto Header = getIncludeHeader(*S, Entry.second)) { Symbol NewSym = *S; - NewSym.IncludeHeaders.push_back({*Header, 1}); + NewSym.IncludeHeaders.push_back({std::move(*Header), 1}); Symbols.insert(NewSym); } } @@ -736,8 +734,8 @@ void SymbolCollector::addDefinition(const NamedDecl &ND, /// Gets a canonical include (URI of the header or
or "header") for /// header of \p FID (which should usually be the *expansion* file). /// Returns None if includes should not be inserted for this file. -llvm::Optional -SymbolCollector::getIncludeHeader(llvm::StringRef QName, FileID FID) { +llvm::Optional SymbolCollector::getIncludeHeader(const Symbol &S, + FileID FID) { const SourceManager &SM = ASTCtx->getSourceManager(); const FileEntry *FE = SM.getFileEntryForID(FID); if (!FE || FE->getName().empty()) @@ -746,10 +744,18 @@ SymbolCollector::getIncludeHeader(llvm::StringRef QName, FileID FID) { // If a file is mapped by canonical headers, use that mapping, regardless // of whether it's an otherwise-good header (header guards etc). if (Opts.Includes) { + llvm::SmallString<256> QName = S.Scope; + QName.append(S.Name); llvm::StringRef Canonical = Opts.Includes->mapHeader(Filename, QName); // If we had a mapping, always use it. - if (Canonical.startswith("<") || Canonical.startswith("\"")) + if (Canonical.startswith("<") || Canonical.startswith("\"")) { + // Hack: there are two std::move() overloads from different headers. + // CanonicalIncludes returns the common one-arg one from . + if (Canonical == "" && S.Name == "move" && + S.Signature.contains(',')) + Canonical = ""; return Canonical.str(); + } if (Canonical != Filename) return toURI(SM, Canonical, Opts); } @@ -757,7 +763,7 @@ SymbolCollector::getIncludeHeader(llvm::StringRef QName, FileID FID) { // A .inc or .def file is often included into a real header to define // symbols (e.g. LLVM tablegen files). if (Filename.endswith(".inc") || Filename.endswith(".def")) - return getIncludeHeader(QName, SM.getFileID(SM.getIncludeLoc(FID))); + return getIncludeHeader(S, SM.getFileID(SM.getIncludeLoc(FID))); // Conservatively refuse to insert #includes to files without guards. return llvm::None; } diff --git a/clang-tools-extra/clangd/index/SymbolCollector.h b/clang-tools-extra/clangd/index/SymbolCollector.h index 9b30aeba95383..a1b40a0dba790 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.h +++ b/clang-tools-extra/clangd/index/SymbolCollector.h @@ -131,7 +131,7 @@ class SymbolCollector : public index::IndexDataConsumer { void processRelations(const NamedDecl &ND, const SymbolID &ID, ArrayRef Relations); - llvm::Optional getIncludeHeader(llvm::StringRef QName, FileID); + llvm::Optional getIncludeHeader(const Symbol &S, FileID); bool isSelfContainedHeader(FileID); // Heuristically headers that only want to be included via an umbrella. static bool isDontIncludeMeHeader(llvm::StringRef); diff --git a/clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp b/clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp index 7969b638d3d3c..fa96a4579624b 100644 --- a/clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp +++ b/clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp @@ -36,9 +36,9 @@ TEST(CanonicalIncludesTest, CXXStandardLibrary) { // Usual standard library symbols are mapped correctly. EXPECT_EQ("", CI.mapHeader("path/vector.h", "std::vector")); EXPECT_EQ("", CI.mapHeader("path/stdio.h", "std::printf")); - // std::move is ambiguous, currently mapped only based on path - EXPECT_EQ("", CI.mapHeader("libstdc++/bits/move.h", "std::move")); - EXPECT_EQ("path/utility.h", CI.mapHeader("path/utility.h", "std::move")); + // std::move is ambiguous, currently always mapped to + EXPECT_EQ("", + CI.mapHeader("libstdc++/bits/stl_algo.h", "std::move")); // Unknown std symbols aren't mapped. EXPECT_EQ("foo/bar.h", CI.mapHeader("foo/bar.h", "std::notathing")); // iosfwd declares some symbols it doesn't own. diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp index 80995baf946f8..47071ac2da38f 100644 --- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp +++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp @@ -1280,10 +1280,27 @@ TEST_F(SymbolCollectorTest, CanonicalSTLHeader) { Language.CPlusPlus = true; Includes.addSystemHeadersMapping(Language); CollectorOpts.Includes = &Includes; - runSymbolCollector("namespace std { class string {}; }", /*Main=*/""); - EXPECT_THAT(Symbols, - Contains(AllOf(QName("std::string"), DeclURI(TestHeaderURI), - IncludeHeader("")))); + runSymbolCollector( + R"cpp( + namespace std { + class string {}; + // Move overloads have special handling. + template T&& move(T&&); + template O move(I, I, O); + } + )cpp", + /*Main=*/""); + for (const auto &S : Symbols) + llvm::errs() << S.Scope << S.Name << " in " << S.IncludeHeaders.size() + << "\n"; + EXPECT_THAT( + Symbols, + UnorderedElementsAre( + QName("std"), + AllOf(QName("std::string"), DeclURI(TestHeaderURI), + IncludeHeader("")), + AllOf(Labeled("move(T &&)"), IncludeHeader("")), + AllOf(Labeled("move(I, I, O)"), IncludeHeader("")))); } TEST_F(SymbolCollectorTest, IWYUPragma) { From e72cfd938f21bd194a2d2f45a4f8ee7d94d33bf8 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Sat, 3 Oct 2020 22:06:54 -0700 Subject: [PATCH 250/321] Rename the VECREDUCE_STRICT_{FADD,FMUL} SDNodes to VECREDUCE_SEQ_{FADD,FMUL}. The STRICT was causing unnecessary confusion. I think SEQ is a more accurate name for what they actually do, and the other obvious option of "ORDERED" has the issue of already having a meaning in FP contexts. Differential Revision: https://reviews.llvm.org/D88791 --- llvm/include/llvm/CodeGen/ISDOpcodes.h | 21 +++++++++++++++---- .../SelectionDAG/SelectionDAGBuilder.cpp | 4 ++-- .../SelectionDAG/SelectionDAGDumper.cpp | 4 ++-- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 19d730738755d..dbe1f0897c13b 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1103,12 +1103,25 @@ enum NodeType { /// Generic reduction nodes. These nodes represent horizontal vector /// reduction operations, producing a scalar result. - /// The STRICT variants perform reductions in sequential order. The first + /// The SEQ variants perform reductions in sequential order. The first /// operand is an initial scalar accumulator value, and the second operand /// is the vector to reduce. - VECREDUCE_STRICT_FADD, - VECREDUCE_STRICT_FMUL, - /// These reductions are non-strict, and have a single vector operand. + /// E.g. RES = VECREDUCE_SEQ_FADD f32 ACC, <4 x f32> SRC_VEC + /// ... is equivalent to + /// RES = (((ACC + SRC_VEC[0]) + SRC_VEC[1]) + SRC_VEC[2]) + SRC_VEC[3] + VECREDUCE_SEQ_FADD, + VECREDUCE_SEQ_FMUL, + + /// These reductions have relaxed evaluation order semantics, and have a + /// single vector operand. The order of evaluation is unspecified. For + /// pow-of-2 vectors, one valid legalizer expansion is to use a tree + /// reduction, i.e.: + /// For RES = VECREDUCE_FADD <8 x f16> SRC_VEC + /// PART_RDX = FADD SRC_VEC[0:3], SRC_VEC[4:7] + /// PART_RDX2 = FADD PART_RDX[0:1], PART_RDX[2:3] + /// RES = FADD PART_RDX2[0], PART_RDX2[1] + /// For non-pow-2 vectors, this can be computed by extracting each element + /// and performing the operation as if it were scalarized. VECREDUCE_FADD, VECREDUCE_FMUL, /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2f3601137f832..01972b0968e39 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8943,7 +8943,7 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags), SDFlags); else - Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2, SDFlags); + Res = DAG.getNode(ISD::VECREDUCE_SEQ_FADD, dl, VT, Op1, Op2, SDFlags); break; case Intrinsic::vector_reduce_fmul: if (SDFlags.hasAllowReassociation()) @@ -8951,7 +8951,7 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags), SDFlags); else - Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2, SDFlags); + Res = DAG.getNode(ISD::VECREDUCE_SEQ_FMUL, dl, VT, Op1, Op2, SDFlags); break; case Intrinsic::vector_reduce_add: Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index f854a4f4d35f8..1587398ea0229 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -450,9 +450,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SETFALSE2: return "setfalse2"; } case ISD::VECREDUCE_FADD: return "vecreduce_fadd"; - case ISD::VECREDUCE_STRICT_FADD: return "vecreduce_strict_fadd"; + case ISD::VECREDUCE_SEQ_FADD: return "vecreduce_seq_fadd"; case ISD::VECREDUCE_FMUL: return "vecreduce_fmul"; - case ISD::VECREDUCE_STRICT_FMUL: return "vecreduce_strict_fmul"; + case ISD::VECREDUCE_SEQ_FMUL: return "vecreduce_seq_fmul"; case ISD::VECREDUCE_ADD: return "vecreduce_add"; case ISD::VECREDUCE_MUL: return "vecreduce_mul"; case ISD::VECREDUCE_AND: return "vecreduce_and"; From 7361ce73efcfba8ac12b7dc1f57a6291ea961c41 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 6 Oct 2020 13:04:11 -0700 Subject: [PATCH 251/321] [AMDGPU] Use default zero flag operands in flat scratch This is no-op so far because we do not select these yet. Differential Revision: https://reviews.llvm.org/D88920 --- llvm/lib/Target/AMDGPU/FLATInstructions.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index abe29f73a9141..23df25b69b482 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -238,8 +238,8 @@ class FLAT_Scratch_Load_Pseudo { let has_data = 0; let mayLoad = 1; @@ -254,8 +254,8 @@ class FLAT_Scratch_Store_Pseudo { let mayLoad = 0; let mayStore = 1; From a73166a45204378f6f8b4a6ff2d962f0ff56d51e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 7 Oct 2020 18:50:17 +0100 Subject: [PATCH 252/321] [LAA] Use DL to get element size for bound computation. Currently LAA uses getScalarSizeInBits to compute the size of an element when computing the end bound of an access. This does not work as expected for pointers to pointers, because getScalarSizeInBits will return 0 for pointer types. By using DataLayout to get the size of the element we can also correctly handle pointer element types. Note the changes to the existing test, which seems to also use the wrong offset for the end. Fixes PR47751. Reviewed By: anemet Differential Revision: https://reviews.llvm.org/D88953 --- llvm/lib/Analysis/LoopAccessAnalysis.cpp | 3 ++- .../Transforms/LoopVectorize/X86/pr23997.ll | 22 +++++++++---------- .../runtime-check-pointer-element-type.ll | 4 ++-- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 9d740602a1e80..34de1a052ddfd 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -227,8 +227,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd); } // Add the size of the pointed element to ScEnd. + auto &DL = Lp->getHeader()->getModule()->getDataLayout(); unsigned EltSize = - Ptr->getType()->getPointerElementType()->getScalarSizeInBits() / 8; + DL.getTypeStoreSizeInBits(Ptr->getType()->getPointerElementType()) / 8; const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize); ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); } diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll index 6b61ddbc413a0..63bad59d422e7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll @@ -22,13 +22,13 @@ define void @foo(i8 addrspace(1)* align 8 dereferenceable_or_null(16), i8 addrsp ; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP2]], 1 ; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[TMP4]], i64 [[TMP2]], i64 1 ; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[UMAX1]], 3 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 8 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 16 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8 addrspace(1)* [[TMP0]], i64 [[TMP6]] ; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, i8 addrspace(1)* [[TMP1]], i64 [[TMP6]] ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8 addrspace(1)* [[DOT10]], [[SCEVGEP2]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8 addrspace(1)* [[DOT12]], [[SCEVGEP]] -; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[UMAX]], -16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -39,28 +39,28 @@ define void @foo(i8 addrspace(1)* align 8 dereferenceable_or_null(16), i8 addrsp ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP8]], align 8, !alias.scope !0 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP7]], i64 4 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP9]] to <4 x i8 addrspace(1)*> addrspace(1)* -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP10]], align 8, !alias.scope !0 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP10]], align 8, !alias.scope !0 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP7]], i64 8 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP11]] to <4 x i8 addrspace(1)*> addrspace(1)* -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP12]], align 8, !alias.scope !0 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP12]], align 8, !alias.scope !0 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP7]], i64 12 ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP13]] to <4 x i8 addrspace(1)*> addrspace(1)* -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP14]], align 8, !alias.scope !0 +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP14]], align 8, !alias.scope !0 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT11]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP15]] to <4 x i8 addrspace(1)*> addrspace(1)* ; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP16]], align 8, !alias.scope !3, !noalias !0 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP15]], i64 4 ; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP17]] to <4 x i8 addrspace(1)*> addrspace(1)* -; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD6]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP18]], align 8, !alias.scope !3, !noalias !0 +; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD3]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP18]], align 8, !alias.scope !3, !noalias !0 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP15]], i64 8 ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP19]] to <4 x i8 addrspace(1)*> addrspace(1)* -; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD7]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP20]], align 8, !alias.scope !3, !noalias !0 +; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD4]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP20]], align 8, !alias.scope !3, !noalias !0 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP15]], i64 12 ; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP21]] to <4 x i8 addrspace(1)*> addrspace(1)* -; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD8]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP22]], align 8, !alias.scope !3, !noalias !0 +; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD5]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP22]], align 8, !alias.scope !3, !noalias !0 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 +; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -75,7 +75,7 @@ define void @foo(i8 addrspace(1)* align 8 dereferenceable_or_null(16), i8 addrsp ; CHECK-NEXT: store i8 addrspace(1)* [[V]], i8 addrspace(1)* addrspace(1)* [[DOT20]], align 8 ; CHECK-NEXT: [[INDVARS_IV_NEXT4]] = add nuw nsw i64 [[INDVARS_IV3]], 1 ; CHECK-NEXT: [[DOT21:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT4]], [[TMP2]] -; CHECK-NEXT: br i1 [[DOT21]], label [[LOOP]], label [[LOOPEXIT]], !llvm.loop !7 +; CHECK-NEXT: br i1 [[DOT21]], label [[LOOP]], label [[LOOPEXIT]], [[LOOP7:!llvm.loop !.*]] ; CHECK: loopexit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll index f9ec18cb6fd88..e055888a37696 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll @@ -12,10 +12,10 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 define void @test(i64 %arg, i32 %arg1, i8** %base) { ; CHECK: LAA: Adding RT check for range: ; CHECK-NEXT: Start: ((8 * (zext i32 (-1 + %arg1) to i64)) + (8 * (1 smin %arg)) + (-8 * %arg) + %base) -; CHECK-SAME: End: ((8 * (zext i32 (-1 + %arg1) to i64)) + %base) +; CHECK-SAME: End: (8 + (8 * (zext i32 (-1 + %arg1) to i64)) + %base) ; CHECK-NEXT: LAA: Adding RT check for range: ; CHECK-NEXT: Start: ((8 * (1 smin %arg)) + %base) -; CHECK-SAME: End: ((8 * %arg) + %base) +; CHECK-SAME: End: (8 + (8 * %arg) + %base) ; CHECK: vector.body From dd2f79ed4422860ea9507e17cc33f1262d09db50 Mon Sep 17 00:00:00 2001 From: Edd Dawson Date: Wed, 7 Oct 2020 13:58:12 -0400 Subject: [PATCH 253/321] [test][MC] Use %python in llvm/test/MC/COFF/bigobj.py ... instead of the one on the $PATH. Reviewed By: hubert.reinterpretcast Differential Revision: https://reviews.llvm.org/D88986 --- llvm/test/MC/COFF/bigobj.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/MC/COFF/bigobj.py b/llvm/test/MC/COFF/bigobj.py index 7aab8928ff433..f7c000d20d345 100644 --- a/llvm/test/MC/COFF/bigobj.py +++ b/llvm/test/MC/COFF/bigobj.py @@ -1,4 +1,4 @@ -# RUN: python %s | llvm-mc -filetype=obj -triple i686-pc-win32 - | llvm-readobj -h - | FileCheck %s +# RUN: %python %s | llvm-mc -filetype=obj -triple i686-pc-win32 - | llvm-readobj -h - | FileCheck %s from __future__ import print_function From 45014ce36f28698bb0e84ecad3a3ea7da4f476ad Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 6 Oct 2020 14:07:44 -0700 Subject: [PATCH 254/321] [AMDGPU] Add tied operand to d16 scratch loads This is still no-op because there is no selection for these opcodes. Differential Revision: https://reviews.llvm.org/D88927 --- llvm/lib/Target/AMDGPU/FLATInstructions.td | 31 +++++++++++++--------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 23df25b69b482..3dc4bdb861b9d 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -234,12 +234,16 @@ class FLAT_Global_Store_AddTid_Pseudo : FLAT_Pseudo< opName, (outs regClass:$vdst), - !if(EnableSaddr, - (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc), - (ins VGPR_32:$vaddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc)), + !con( + !if(EnableSaddr, + (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), + (ins VGPR_32:$vaddr, flat_offset:$offset)), + !if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, regClass:$vdst_in), + (ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))), " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> { let has_data = 0; let mayLoad = 1; @@ -248,6 +252,9 @@ class FLAT_Scratch_Load_Pseudo : FLAT_Pseudo< @@ -267,10 +274,10 @@ class FLAT_Scratch_Store_Pseudo { +multiclass FLAT_Scratch_Load_Pseudo { let is_flat_scratch = 1 in { - def "" : FLAT_Scratch_Load_Pseudo; - def _SADDR : FLAT_Scratch_Load_Pseudo; + def "" : FLAT_Scratch_Load_Pseudo; + def _SADDR : FLAT_Scratch_Load_Pseudo; } } @@ -681,12 +688,12 @@ defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", V defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>; defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>; -defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32>; -defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32>; -defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32>; -defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32>; -defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32>; -defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32>; +defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>; +defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>; +defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>; +defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>; +defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>; +defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>; defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>; defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>; From c1247f0e74bff00ab9a896a8132318916f3e84a7 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Wed, 7 Oct 2020 11:19:54 -0700 Subject: [PATCH 255/321] [mlir] Fix build after 322d0afd875df66b36e4810a2b95c20a8f22ab9b due to change in intrinsic overloads. I'd forgottent to run the mlir tests after removing the scalar input overload on the fadd/fmul reductions. This is a quick fix for the mlir bot. --- mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td | 4 +--- mlir/test/Target/llvmir-intrinsics.mlir | 8 ++++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td index 9e9237e82b536..aa7cd460030d4 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td @@ -308,9 +308,7 @@ class LLVM_VectorReductionAcc llvm::Function *fn = llvm::Intrinsic::getDeclaration( module, llvm::Intrinsic::vector_reduce_}] # mnem # [{, - { }] # StrJoin.lst, - ListIntSubst.lst)>.result # [{ + { }] # StrJoin.lst>.result # [{ }); auto operands = lookupValues(opInst.getOperands()); llvm::FastMathFlags origFM = builder.getFastMathFlags(); diff --git a/mlir/test/Target/llvmir-intrinsics.mlir b/mlir/test/Target/llvmir-intrinsics.mlir index 7ab440a7e5ca7..ef1ed5acf8240 100644 --- a/mlir/test/Target/llvmir-intrinsics.mlir +++ b/mlir/test/Target/llvmir-intrinsics.mlir @@ -202,13 +202,13 @@ llvm.func @vector_reductions(%arg0: !llvm.float, %arg1: !llvm.vec<8 x float>, %a "llvm.intr.vector.reduce.umax"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 // CHECK: call i32 @llvm.vector.reduce.umin.v8i32 "llvm.intr.vector.reduce.umin"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 - // CHECK: call float @llvm.vector.reduce.fadd.f32.v8f32 + // CHECK: call float @llvm.vector.reduce.fadd.v8f32 "llvm.intr.vector.reduce.fadd"(%arg0, %arg1) : (!llvm.float, !llvm.vec<8 x float>) -> !llvm.float - // CHECK: call float @llvm.vector.reduce.fmul.f32.v8f32 + // CHECK: call float @llvm.vector.reduce.fmul.v8f32 "llvm.intr.vector.reduce.fmul"(%arg0, %arg1) : (!llvm.float, !llvm.vec<8 x float>) -> !llvm.float - // CHECK: call reassoc float @llvm.vector.reduce.fadd.f32.v8f32 + // CHECK: call reassoc float @llvm.vector.reduce.fadd.v8f32 "llvm.intr.vector.reduce.fadd"(%arg0, %arg1) {reassoc = true} : (!llvm.float, !llvm.vec<8 x float>) -> !llvm.float - // CHECK: call reassoc float @llvm.vector.reduce.fmul.f32.v8f32 + // CHECK: call reassoc float @llvm.vector.reduce.fmul.v8f32 "llvm.intr.vector.reduce.fmul"(%arg0, %arg1) {reassoc = true} : (!llvm.float, !llvm.vec<8 x float>) -> !llvm.float // CHECK: call i32 @llvm.vector.reduce.xor.v8i32 "llvm.intr.vector.reduce.xor"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 From 42d91438ad27fda6df9499ae2a99b569fc6e2f75 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 7 Oct 2020 17:27:32 +0100 Subject: [PATCH 256/321] [CodeGen][X86] Cleanup labels on some sse/avx intrinsics tests. NFCI. Add some missing CHECK-LABEL lines. Remove leading '@' so it'll be possible to match against c and c++ builds in a future patch. --- .../X86/avx-builtins-constrained-cmp.c | 8 +-- clang/test/CodeGen/X86/avx-builtins.c | 20 +++--- clang/test/CodeGen/X86/avx-shuffle-builtins.c | 62 +++++++++---------- .../X86/sse-builtins-constrained-cmp.c | 24 +++---- clang/test/CodeGen/X86/sse-builtins.c | 54 ++++++++-------- clang/test/CodeGen/X86/sse2-builtins.c | 4 +- 6 files changed, 86 insertions(+), 86 deletions(-) diff --git a/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c b/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c index 1d48204c4acfc..d98862b0c8fff 100644 --- a/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c +++ b/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c @@ -4,7 +4,7 @@ #include __m256d test_mm256_cmp_pd_eq_oq(__m256d a, __m256d b) { - // CHECK-LABEL: @test_mm256_cmp_pd_eq_oq + // CHECK-LABEL: test_mm256_cmp_pd_eq_oq // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") return _mm256_cmp_pd(a, b, _CMP_EQ_OQ); } @@ -196,7 +196,7 @@ __m256d test_mm256_cmp_pd_true_us(__m256d a, __m256d b) { } __m256 test_mm256_cmp_ps_eq_oq(__m256 a, __m256 b) { - // CHECK-LABEL: @test_mm256_cmp_ps_eq_oq + // CHECK-LABEL: test_mm256_cmp_ps_eq_oq // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") return _mm256_cmp_ps(a, b, _CMP_EQ_OQ); } @@ -388,7 +388,7 @@ __m256 test_mm256_cmp_ps_true_us(__m256 a, __m256 b) { } __m128d test_mm_cmp_pd_eq_oq(__m128d a, __m128d b) { - // CHECK-LABEL: @test_mm_cmp_pd_eq_oq + // CHECK-LABEL: test_mm_cmp_pd_eq_oq // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") return _mm_cmp_pd(a, b, _CMP_EQ_OQ); } @@ -580,7 +580,7 @@ __m128d test_mm_cmp_pd_true_us(__m128d a, __m128d b) { } __m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) { - // CHECK-LABEL: @test_mm_cmp_ps_eq_oq + // CHECK-LABEL: test_mm_cmp_ps_eq_oq // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") return _mm_cmp_ps(a, b, _CMP_EQ_OQ); } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index b31a234db2eda..4dfa64396d476 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -214,7 +214,7 @@ __m256 test_mm_ceil_ps(__m256 x) { } __m256d test_mm256_cmp_pd_eq_oq(__m256d a, __m256d b) { - // CHECK-LABEL: @test_mm256_cmp_pd_eq_oq + // CHECK-LABEL: test_mm256_cmp_pd_eq_oq // CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}} return _mm256_cmp_pd(a, b, _CMP_EQ_OQ); } @@ -406,7 +406,7 @@ __m256d test_mm256_cmp_pd_true_us(__m256d a, __m256d b) { } __m256 test_mm256_cmp_ps_eq_oq(__m256 a, __m256 b) { - // CHECK-LABEL: @test_mm256_cmp_ps_eq_oq + // CHECK-LABEL: test_mm256_cmp_ps_eq_oq // CHECK: fcmp oeq <8 x float> %{{.*}}, %{{.*}} return _mm256_cmp_ps(a, b, _CMP_EQ_OQ); } @@ -598,7 +598,7 @@ __m256 test_mm256_cmp_ps_true_us(__m256 a, __m256 b) { } __m128d test_mm_cmp_pd_eq_oq(__m128d a, __m128d b) { - // CHECK-LABEL: @test_mm_cmp_pd_eq_oq + // CHECK-LABEL: test_mm_cmp_pd_eq_oq // CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}} return _mm_cmp_pd(a, b, _CMP_EQ_OQ); } @@ -790,7 +790,7 @@ __m128d test_mm_cmp_pd_true_us(__m128d a, __m128d b) { } __m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) { - // CHECK-LABEL: @test_mm_cmp_ps_eq_oq + // CHECK-LABEL: test_mm_cmp_ps_eq_oq // CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}} return _mm_cmp_ps(a, b, _CMP_EQ_OQ); } @@ -2062,19 +2062,19 @@ int test_mm256_testz_si256(__m256i A, __m256i B) { } __m256 test_mm256_undefined_ps() { - // CHECK-LABEL: @test_mm256_undefined_ps + // CHECK-LABEL: test_mm256_undefined_ps // CHECK: ret <8 x float> zeroinitializer return _mm256_undefined_ps(); } __m256d test_mm256_undefined_pd() { - // CHECK-LABEL: @test_mm256_undefined_pd + // CHECK-LABEL: test_mm256_undefined_pd // CHECK: ret <4 x double> zeroinitializer return _mm256_undefined_pd(); } __m256i test_mm256_undefined_si256() { - // CHECK-LABEL: @test_mm256_undefined_si256 + // CHECK-LABEL: test_mm256_undefined_si256 // CHECK: ret <4 x i64> zeroinitializer return _mm256_undefined_si256(); } @@ -2150,21 +2150,21 @@ __m256i test_mm256_zextsi128_si256(__m128i A) { double test_mm256_cvtsd_f64(__m256d __a) { - // CHECK-LABEL: @test_mm256_cvtsd_f64 + // CHECK-LABEL: test_mm256_cvtsd_f64 // CHECK: extractelement <4 x double> %{{.*}}, i32 0 return _mm256_cvtsd_f64(__a); } int test_mm256_cvtsi256_si32(__m256i __a) { - // CHECK-LABEL: @test_mm256_cvtsi256_si32 + // CHECK-LABEL: test_mm256_cvtsi256_si32 // CHECK: extractelement <8 x i32> %{{.*}}, i32 0 return _mm256_cvtsi256_si32(__a); } float test_mm256_cvtss_f32(__m256 __a) { - // CHECK-LABEL: @test_mm256_cvtss_f32 + // CHECK-LABEL: test_mm256_cvtss_f32 // CHECK: extractelement <8 x float> %{{.*}}, i32 0 return _mm256_cvtss_f32(__a); } diff --git a/clang/test/CodeGen/X86/avx-shuffle-builtins.c b/clang/test/CodeGen/X86/avx-shuffle-builtins.c index 061cad76a5a9a..b1cd2c63ecf01 100644 --- a/clang/test/CodeGen/X86/avx-shuffle-builtins.c +++ b/clang/test/CodeGen/X86/avx-shuffle-builtins.c @@ -5,67 +5,67 @@ #include // -// Test LLVM IR codegen of shuffle instructions +// Test LLVM IR codegen of shuffle instructions, checking if the masks are correct // __m256 x(__m256 a, __m256 b) { - // Check if the mask is correct + // CHECK-LABEL: x // CHECK: shufflevector{{.*}} return _mm256_shuffle_ps(a, b, 203); } __m128d test_mm_permute_pd(__m128d a) { - // Check if the mask is correct + // CHECK-LABEL: test_mm_permute_pd // CHECK: shufflevector{{.*}} return _mm_permute_pd(a, 1); } __m256d test_mm256_permute_pd(__m256d a) { - // Check if the mask is correct + // CHECK-LABEL: test_mm256_permute_pd // CHECK: shufflevector{{.*}} return _mm256_permute_pd(a, 5); } __m128 test_mm_permute_ps(__m128 a) { - // Check if the mask is correct + // CHECK-LABEL: test_mm_permute_ps // CHECK: shufflevector{{.*}} return _mm_permute_ps(a, 0x1b); } // Test case for PR12401 __m128 test_mm_permute_ps2(__m128 a) { - // Check if the mask is correct + // CHECK-LABEL: test_mm_permute_ps2 // CHECK: shufflevector{{.*}} return _mm_permute_ps(a, 0xe6); } __m256 test_mm256_permute_ps(__m256 a) { - // Check if the mask is correct + // CHECK-LABEL: test_mm256_permute_ps // CHECK: shufflevector{{.*}} return _mm256_permute_ps(a, 0x1b); } __m256d test_mm256_permute2f128_pd(__m256d a, __m256d b) { - // Check if the mask is correct + // CHECK-LABEL: test_mm256_permute2f128_pd // CHECK: shufflevector{{.*}} return _mm256_permute2f128_pd(a, b, 0x31); } __m256 test_mm256_permute2f128_ps(__m256 a, __m256 b) { - // Check if the mask is correct + // CHECK-LABEL: test_mm256_permute2f128_ps // CHECK: shufflevector{{.*}} return _mm256_permute2f128_ps(a, b, 0x13); } __m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) { - // Check if the mask is correct + // CHECK-LABEL: test_mm256_permute2f128_si256 // CHECK: shufflevector{{.*}} <8 x i32> return _mm256_permute2f128_si256(a, b, 0x20); } __m128 test_mm_broadcast_ss(float const *__a) { - // CHECK-LABEL: @test_mm_broadcast_ss + // CHECK-LABEL: test_mm_broadcast_ss // CHECK: insertelement <4 x float> {{.*}}, i32 0 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> undef, <4 x i32> zeroinitializer return _mm_broadcast_ss(__a); @@ -73,7 +73,7 @@ test_mm_broadcast_ss(float const *__a) { __m256d test_mm256_broadcast_sd(double const *__a) { - // CHECK-LABEL: @test_mm256_broadcast_sd + // CHECK-LABEL: test_mm256_broadcast_sd // CHECK: insertelement <4 x double> {{.*}}, i32 0 // CHECK: shufflevector <4 x double> {{.*}}, <4 x double> undef, <4 x i32> zeroinitializer return _mm256_broadcast_sd(__a); @@ -81,7 +81,7 @@ test_mm256_broadcast_sd(double const *__a) { __m256 test_mm256_broadcast_ss(float const *__a) { - // CHECK-LABEL: @test_mm256_broadcast_ss + // CHECK-LABEL: test_mm256_broadcast_ss // CHECK: insertelement <8 x float> {{.*}}, i32 0 // CHECK: shufflevector <8 x float> {{.*}}, <8 x float> undef, <8 x i32> zeroinitializer return _mm256_broadcast_ss(__a); @@ -90,37 +90,37 @@ test_mm256_broadcast_ss(float const *__a) { // Make sure we have the correct mask for each insertf128 case. __m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) { - // CHECK-LABEL: @test_mm256_insertf128_ps_0 + // CHECK-LABEL: test_mm256_insertf128_ps_0 // CHECK: shufflevector{{.*}} return _mm256_insertf128_ps(a, b, 0); } __m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) { - // CHECK-LABEL: @test_mm256_insertf128_pd_0 + // CHECK-LABEL: test_mm256_insertf128_pd_0 // CHECK: shufflevector{{.*}} return _mm256_insertf128_pd(a, b, 0); } __m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) { - // CHECK-LABEL: @test_mm256_insertf128_si256_0 + // CHECK-LABEL: test_mm256_insertf128_si256_0 // CHECK: shufflevector{{.*}} return _mm256_insertf128_si256(a, b, 0); } __m256 test_mm256_insertf128_ps_1(__m256 a, __m128 b) { - // CHECK-LABEL: @test_mm256_insertf128_ps_1 + // CHECK-LABEL: test_mm256_insertf128_ps_1 // CHECK: shufflevector{{.*}} return _mm256_insertf128_ps(a, b, 1); } __m256d test_mm256_insertf128_pd_1(__m256d a, __m128d b) { - // CHECK-LABEL: @test_mm256_insertf128_pd_1 + // CHECK-LABEL: test_mm256_insertf128_pd_1 // CHECK: shufflevector{{.*}} return _mm256_insertf128_pd(a, b, 1); } __m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) { - // CHECK-LABEL: @test_mm256_insertf128_si256_1 + // CHECK-LABEL: test_mm256_insertf128_si256_1 // CHECK: shufflevector{{.*}} return _mm256_insertf128_si256(a, b, 1); } @@ -128,73 +128,73 @@ __m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) { // Make sure we have the correct mask for each extractf128 case. __m128 test_mm256_extractf128_ps_0(__m256 a) { - // CHECK-LABEL: @test_mm256_extractf128_ps_0 + // CHECK-LABEL: test_mm256_extractf128_ps_0 // CHECK: shufflevector{{.*}} return _mm256_extractf128_ps(a, 0); } __m128d test_mm256_extractf128_pd_0(__m256d a) { - // CHECK-LABEL: @test_mm256_extractf128_pd_0 + // CHECK-LABEL: test_mm256_extractf128_pd_0 // CHECK: shufflevector{{.*}} return _mm256_extractf128_pd(a, 0); } __m128i test_mm256_extractf128_si256_0(__m256i a) { - // CHECK-LABEL: @test_mm256_extractf128_si256_0 + // CHECK-LABEL: test_mm256_extractf128_si256_0 // CHECK: shufflevector{{.*}} return _mm256_extractf128_si256(a, 0); } __m128 test_mm256_extractf128_ps_1(__m256 a) { - // CHECK-LABEL: @test_mm256_extractf128_ps_1 + // CHECK-LABEL: test_mm256_extractf128_ps_1 // CHECK: shufflevector{{.*}} return _mm256_extractf128_ps(a, 1); } __m128d test_mm256_extractf128_pd_1(__m256d a) { - // CHECK-LABEL: @test_mm256_extractf128_pd_1 + // CHECK-LABEL: test_mm256_extractf128_pd_1 // CHECK: shufflevector{{.*}} return _mm256_extractf128_pd(a, 1); } __m128i test_mm256_extractf128_si256_1(__m256i a) { - // CHECK-LABEL: @test_mm256_extractf128_si256_1 + // CHECK-LABEL: test_mm256_extractf128_si256_1 // CHECK: shufflevector{{.*}} return _mm256_extractf128_si256(a, 1); } __m256 test_mm256_set_m128(__m128 hi, __m128 lo) { - // CHECK-LABEL: @test_mm256_set_m128 + // CHECK-LABEL: test_mm256_set_m128 // CHECK: shufflevector{{.*}} return _mm256_set_m128(hi, lo); } __m256d test_mm256_set_m128d(__m128d hi, __m128d lo) { - // CHECK-LABEL: @test_mm256_set_m128d + // CHECK-LABEL: test_mm256_set_m128d // CHECK: shufflevector{{.*}} return _mm256_set_m128d(hi, lo); } __m256i test_mm256_set_m128i(__m128i hi, __m128i lo) { - // CHECK-LABEL: @test_mm256_set_m128i + // CHECK-LABEL: test_mm256_set_m128i // CHECK: shufflevector{{.*}} return _mm256_set_m128i(hi, lo); } __m256 test_mm256_setr_m128(__m128 hi, __m128 lo) { - // CHECK-LABEL: @test_mm256_setr_m128 + // CHECK-LABEL: test_mm256_setr_m128 // CHECK: shufflevector{{.*}} return _mm256_setr_m128(lo, hi); } __m256d test_mm256_setr_m128d(__m128d hi, __m128d lo) { - // CHECK-LABEL: @test_mm256_setr_m128d + // CHECK-LABEL: test_mm256_setr_m128d // CHECK: shufflevector{{.*}} return _mm256_setr_m128d(lo, hi); } __m256i test_mm256_setr_m128i(__m128i hi, __m128i lo) { - // CHECK-LABEL: @test_mm256_setr_m128i + // CHECK-LABEL: test_mm256_setr_m128i // CHECK: shufflevector{{.*}} return _mm256_setr_m128i(lo, hi); } diff --git a/clang/test/CodeGen/X86/sse-builtins-constrained-cmp.c b/clang/test/CodeGen/X86/sse-builtins-constrained-cmp.c index 15698b0b92fbf..eb1488885e5dd 100644 --- a/clang/test/CodeGen/X86/sse-builtins-constrained-cmp.c +++ b/clang/test/CodeGen/X86/sse-builtins-constrained-cmp.c @@ -4,7 +4,7 @@ #include __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpeq_ps + // CHECK-LABEL: test_mm_cmpeq_ps // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -13,7 +13,7 @@ __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpge_ps + // CHECK-LABEL: test_mm_cmpge_ps // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -22,7 +22,7 @@ __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpgt_ps + // CHECK-LABEL: test_mm_cmpgt_ps // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -31,7 +31,7 @@ __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmple_ps + // CHECK-LABEL: test_mm_cmple_ps // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -40,7 +40,7 @@ __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmplt_ps + // CHECK-LABEL: test_mm_cmplt_ps // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -49,7 +49,7 @@ __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpneq_ps + // CHECK-LABEL: test_mm_cmpneq_ps // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -58,7 +58,7 @@ __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpnge_ps + // CHECK-LABEL: test_mm_cmpnge_ps // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -67,7 +67,7 @@ __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpngt_ps + // CHECK-LABEL: test_mm_cmpngt_ps // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -76,7 +76,7 @@ __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpnle_ps + // CHECK-LABEL: test_mm_cmpnle_ps // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -85,7 +85,7 @@ __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpnlt_ps + // CHECK-LABEL: test_mm_cmpnlt_ps // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -94,7 +94,7 @@ __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpord_ps + // CHECK-LABEL: test_mm_cmpord_ps // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -103,7 +103,7 @@ __m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpunord_ps + // CHECK-LABEL: test_mm_cmpunord_ps // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index 10623fc067dac..ccbeb68e18488 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -35,7 +35,7 @@ __m128 test_mm_andnot_ps(__m128 A, __m128 B) { } __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpeq_ps + // CHECK-LABEL: test_mm_cmpeq_ps // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -44,13 +44,13 @@ __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpeq_ss + // CHECK-LABEL: test_mm_cmpeq_ss // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0) return _mm_cmpeq_ss(__a, __b); } __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpge_ps + // CHECK-LABEL: test_mm_cmpge_ps // CHECK: [[CMP:%.*]] = fcmp ole <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -59,14 +59,14 @@ __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpge_ss + // CHECK-LABEL: test_mm_cmpge_ss // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2) // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> return _mm_cmpge_ss(__a, __b); } __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpgt_ps + // CHECK-LABEL: test_mm_cmpgt_ps // CHECK: [[CMP:%.*]] = fcmp olt <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -75,14 +75,14 @@ __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpgt_ss + // CHECK-LABEL: test_mm_cmpgt_ss // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1) // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> return _mm_cmpgt_ss(__a, __b); } __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmple_ps + // CHECK-LABEL: test_mm_cmple_ps // CHECK: [[CMP:%.*]] = fcmp ole <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -91,13 +91,13 @@ __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmple_ss(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmple_ss + // CHECK-LABEL: test_mm_cmple_ss // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2) return _mm_cmple_ss(__a, __b); } __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmplt_ps + // CHECK-LABEL: test_mm_cmplt_ps // CHECK: [[CMP:%.*]] = fcmp olt <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -106,13 +106,13 @@ __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmplt_ss(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmplt_ss + // CHECK-LABEL: test_mm_cmplt_ss // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1) return _mm_cmplt_ss(__a, __b); } __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpneq_ps + // CHECK-LABEL: test_mm_cmpneq_ps // CHECK: [[CMP:%.*]] = fcmp une <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -121,13 +121,13 @@ __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpneq_ss + // CHECK-LABEL: test_mm_cmpneq_ss // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4) return _mm_cmpneq_ss(__a, __b); } __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpnge_ps + // CHECK-LABEL: test_mm_cmpnge_ps // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -136,14 +136,14 @@ __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpnge_ss + // CHECK-LABEL: test_mm_cmpnge_ss // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6) // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> return _mm_cmpnge_ss(__a, __b); } __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpngt_ps + // CHECK-LABEL: test_mm_cmpngt_ps // CHECK: [[CMP:%.*]] = fcmp uge <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -152,14 +152,14 @@ __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpngt_ss + // CHECK-LABEL: test_mm_cmpngt_ss // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5) // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> return _mm_cmpngt_ss(__a, __b); } __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpnle_ps + // CHECK-LABEL: test_mm_cmpnle_ps // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -168,13 +168,13 @@ __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpnle_ss + // CHECK-LABEL: test_mm_cmpnle_ss // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6) return _mm_cmpnle_ss(__a, __b); } __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpnlt_ps + // CHECK-LABEL: test_mm_cmpnlt_ps // CHECK: [[CMP:%.*]] = fcmp uge <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -183,13 +183,13 @@ __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpnlt_ss + // CHECK-LABEL: test_mm_cmpnlt_ss // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5) return _mm_cmpnlt_ss(__a, __b); } __m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpord_ps + // CHECK-LABEL: test_mm_cmpord_ps // CHECK: [[CMP:%.*]] = fcmp ord <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -198,13 +198,13 @@ __m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpord_ss(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpord_ss + // CHECK-LABEL: test_mm_cmpord_ss // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7) return _mm_cmpord_ss(__a, __b); } __m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpunord_ps + // CHECK-LABEL: test_mm_cmpunord_ps // CHECK: [[CMP:%.*]] = fcmp uno <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> @@ -213,7 +213,7 @@ __m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) { } __m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) { - // CHECK-LABEL: @test_mm_cmpunord_ss + // CHECK-LABEL: test_mm_cmpunord_ss // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3) return _mm_cmpunord_ss(__a, __b); } @@ -651,8 +651,8 @@ __m128 test_mm_sqrt_ps(__m128 x) { return _mm_sqrt_ps(x); } -__m128 test_sqrt_ss(__m128 x) { - // CHECK: define {{.*}} @test_sqrt_ss +__m128 test_mm_sqrt_ss(__m128 x) { + // CHECK-LABEL: test_mm_sqrt_ss // CHECK: extractelement <4 x float> {{.*}}, i64 0 // CHECK: call float @llvm.sqrt.f32(float {{.*}}) // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i64 0 @@ -785,7 +785,7 @@ int test_mm_ucomineq_ss(__m128 A, __m128 B) { } __m128 test_mm_undefined_ps() { - // CHECK-LABEL: @test_mm_undefined_ps + // CHECK-LABEL: test_mm_undefined_ps // CHECK: ret <4 x float> zeroinitializer return _mm_undefined_ps(); } diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 180677de03314..23df78892418c 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1629,13 +1629,13 @@ int test_mm_ucomineq_sd(__m128d A, __m128d B) { } __m128d test_mm_undefined_pd() { - // CHECK-LABEL: @test_mm_undefined_pd + // CHECK-LABEL: test_mm_undefined_pd // CHECK: ret <2 x double> zeroinitializer return _mm_undefined_pd(); } __m128i test_mm_undefined_si128() { - // CHECK-LABEL: @test_mm_undefined_si128 + // CHECK-LABEL: test_mm_undefined_si128 // CHECK: ret <2 x i64> zeroinitializer return _mm_undefined_si128(); } From e9af30c31e5f1e0430c0f3b25453891410a34191 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 7 Oct 2020 19:01:04 +0100 Subject: [PATCH 257/321] Fix Wdocumentation warnings due to case mismatch. NFCI. --- llvm/include/llvm/CodeGen/CalcSpillWeights.h | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/CodeGen/CalcSpillWeights.h b/llvm/include/llvm/CodeGen/CalcSpillWeights.h index 10e9ac866bdb1..a4deefca75652 100644 --- a/llvm/include/llvm/CodeGen/CalcSpillWeights.h +++ b/llvm/include/llvm/CodeGen/CalcSpillWeights.h @@ -62,15 +62,15 @@ class VirtRegMap; /// (re)compute li's spill weight and allocation hint. void calculateSpillWeightAndHint(LiveInterval &LI); - /// Compute future expected spill weight of a split artifact of li + /// Compute future expected spill weight of a split artifact of LI /// that will span between start and end slot indexes. - /// \param li The live interval to be split. - /// \param start The expected begining of the split artifact. Instructions + /// \param LI The live interval to be split. + /// \param Start The expected beginning of the split artifact. Instructions /// before start will not affect the weight. - /// \param end The expected end of the split artifact. Instructions + /// \param End The expected end of the split artifact. Instructions /// after end will not affect the weight. /// \return The expected spill weight of the split artifact. Returns - /// negative weight for unspillable li. + /// negative weight for unspillable LI. float futureWeight(LiveInterval &LI, SlotIndex Start, SlotIndex End); /// Compute spill weights and allocation hints for all virtual register @@ -79,17 +79,17 @@ class VirtRegMap; protected: /// Helper function for weight calculations. - /// (Re)compute li's spill weight and allocation hint, or, for non null + /// (Re)compute LI's spill weight and allocation hint, or, for non null /// start and end - compute future expected spill weight of a split - /// artifact of li that will span between start and end slot indexes. - /// \param li The live interval for which to compute the weight. - /// \param start The expected begining of the split artifact. Instructions + /// artifact of LI that will span between start and end slot indexes. + /// \param LI The live interval for which to compute the weight. + /// \param Start The expected beginning of the split artifact. Instructions /// before start will not affect the weight. Relevant for /// weight calculation of future split artifact. - /// \param end The expected end of the split artifact. Instructions + /// \param End The expected end of the split artifact. Instructions /// after end will not affect the weight. Relevant for /// weight calculation of future split artifact. - /// \return The spill weight. Returns negative weight for unspillable li. + /// \return The spill weight. Returns negative weight for unspillable LI. float weightCalcHelper(LiveInterval &LI, SlotIndex *Start = nullptr, SlotIndex *End = nullptr); From 03280055154d09940a72d77f11c76dc7a741ba32 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 7 Oct 2020 19:53:39 +0100 Subject: [PATCH 258/321] Fix MSVC "not all control paths return a value" warning. NFCI. --- llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 5e4cbdb429888..ad0bbd1100776 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -124,6 +124,7 @@ static char getInvokeSig(wasm::ValType VT) { case wasm::ValType::EXTERNREF: return 'X'; } + llvm_unreachable("Unhandled wasm::ValType enum"); } // Given the wasm signature, generate the invoke name in the format JS glue code From 42ffba051894a66cf6d7b9a357e765a392d983ed Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 7 Oct 2020 12:15:16 -0700 Subject: [PATCH 259/321] Add a clarifying a comment on CastInst::isNoopCast I made exactly the mistake described, so document the precondition. It would be better to have an assert, but there is (currently) no "castIsValid" with purely type arguments. --- llvm/include/llvm/IR/InstrTypes.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 00aa79c4a2391..c86448ea72cb4 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -650,8 +650,8 @@ class CastInst : public UnaryInstruction { /// DataLayout argument is to determine the pointer size when examining casts /// involving Integer and Pointer types. They are no-op casts if the integer /// is the same size as the pointer. However, pointer size varies with - /// platform. - /// Determine if the described cast is a no-op cast. + /// platform. Note that a precondition of this method is that the cast is + /// legal - i.e. the instruction formed with these operands would verify. static bool isNoopCast( Instruction::CastOps Opcode, ///< Opcode of cast Type *SrcTy, ///< SrcTy of cast From 9c09e2055ee4d4e3b26e393ab460635825a79538 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 7 Oct 2020 21:16:45 +0200 Subject: [PATCH 260/321] [clangd] Add a NewName optional parameter to clangdServer::prepareRename. If the NewName is provided, prepareRename would perform a name validation. The motivation is to allow our internal embeder implement the customized "canRenameInto" functionality on top of prepareRename. Differential Revision: https://reviews.llvm.org/D88881 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 3 ++- clang-tools-extra/clangd/ClangdServer.cpp | 6 ++++-- clang-tools-extra/clangd/ClangdServer.h | 3 +++ .../clangd/unittests/RenameTests.cpp | 16 ++++++++++++---- clang-tools-extra/clangd/unittests/SyncAPI.cpp | 9 +++++---- clang-tools-extra/clangd/unittests/SyncAPI.h | 1 + 6 files changed, 27 insertions(+), 11 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 34d5a305494c3..e5ea4ccc6b8ce 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -794,7 +794,8 @@ void ClangdLSPServer::onWorkspaceSymbol( void ClangdLSPServer::onPrepareRename(const TextDocumentPositionParams &Params, Callback> Reply) { Server->prepareRename( - Params.textDocument.uri.file(), Params.position, Opts.Rename, + Params.textDocument.uri.file(), Params.position, /*NewName*/ llvm::None, + Opts.Rename, [Reply = std::move(Reply)](llvm::Expected Result) mutable { if (!Result) return Reply(Result.takeError()); diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index d38e115a6796b..68afa49514a9e 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -399,9 +399,11 @@ void ClangdServer::formatOnType(PathRef File, llvm::StringRef Code, } void ClangdServer::prepareRename(PathRef File, Position Pos, + llvm::Optional NewName, const RenameOptions &RenameOpts, Callback CB) { - auto Action = [Pos, File = File.str(), CB = std::move(CB), RenameOpts, + auto Action = [Pos, File = File.str(), CB = std::move(CB), + NewName = std::move(NewName), RenameOpts, this](llvm::Expected InpAST) mutable { if (!InpAST) return CB(InpAST.takeError()); @@ -413,7 +415,7 @@ void ClangdServer::prepareRename(PathRef File, Position Pos, // the cost, thus the result may be incomplete as it only contains // main-file occurrences; auto Results = clangd::rename( - {Pos, /*NewName=*/"__clangd_rename_dummy", InpAST->AST, File, + {Pos, NewName.getValueOr("__clangd_rename_dummy"), InpAST->AST, File, RenameOpts.AllowCrossFile ? nullptr : Index, RenameOpts}); if (!Results) { // LSP says to return null on failure, but that will result in a generic diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index d03f500697463..c52ec007bbdce 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -273,7 +273,10 @@ class ClangdServer { StringRef TriggerText, Callback> CB); /// Test the validity of a rename operation. + /// + /// If NewName is provided, it peforms a name validation. void prepareRename(PathRef File, Position Pos, + llvm::Optional NewName, const RenameOptions &RenameOpts, Callback CB); diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp index d925dfa36f500..143e8c6ce1ff0 100644 --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -730,8 +730,8 @@ TEST(RenameTest, PrepareRename) { runAddDocument(Server, FooHPath, FooH.code()); runAddDocument(Server, FooCCPath, FooCC.code()); - auto Results = - runPrepareRename(Server, FooCCPath, FooCC.point(), {/*CrossFile=*/true}); + auto Results = runPrepareRename(Server, FooCCPath, FooCC.point(), + /*NewName=*/llvm::None, {/*CrossFile=*/true}); // verify that for multi-file rename, we only return main-file occurrences. ASSERT_TRUE(bool(Results)) << Results.takeError(); // We don't know the result is complete in prepareRename (passing a nullptr @@ -740,9 +740,17 @@ TEST(RenameTest, PrepareRename) { EXPECT_THAT(FooCC.ranges(), testing::UnorderedElementsAreArray(Results->LocalChanges)); - // single-file rename on global symbols, we should report an error. + // verify name validation. Results = - runPrepareRename(Server, FooCCPath, FooCC.point(), {/*CrossFile=*/false}); + runPrepareRename(Server, FooCCPath, FooCC.point(), + /*NewName=*/std::string("int"), {/*CrossFile=*/true}); + EXPECT_FALSE(Results); + EXPECT_THAT(llvm::toString(Results.takeError()), + testing::HasSubstr("keyword")); + + // single-file rename on global symbols, we should report an error. + Results = runPrepareRename(Server, FooCCPath, FooCC.point(), + /*NewName=*/llvm::None, {/*CrossFile=*/false}); EXPECT_FALSE(Results); EXPECT_THAT(llvm::toString(Results.takeError()), testing::HasSubstr("is used outside")); diff --git a/clang-tools-extra/clangd/unittests/SyncAPI.cpp b/clang-tools-extra/clangd/unittests/SyncAPI.cpp index 6d6879ab62dbf..27b6cf33e0559 100644 --- a/clang-tools-extra/clangd/unittests/SyncAPI.cpp +++ b/clang-tools-extra/clangd/unittests/SyncAPI.cpp @@ -105,11 +105,12 @@ llvm::Expected runRename(ClangdServer &Server, PathRef File, return std::move(*Result); } -llvm::Expected runPrepareRename(ClangdServer &Server, - PathRef File, Position Pos, - const RenameOptions &RenameOpts) { +llvm::Expected +runPrepareRename(ClangdServer &Server, PathRef File, Position Pos, + llvm::Optional NewName, + const RenameOptions &RenameOpts) { llvm::Optional> Result; - Server.prepareRename(File, Pos, RenameOpts, capture(Result)); + Server.prepareRename(File, Pos, NewName, RenameOpts, capture(Result)); return std::move(*Result); } diff --git a/clang-tools-extra/clangd/unittests/SyncAPI.h b/clang-tools-extra/clangd/unittests/SyncAPI.h index aa641fee91af4..fd0f5dba604de 100644 --- a/clang-tools-extra/clangd/unittests/SyncAPI.h +++ b/clang-tools-extra/clangd/unittests/SyncAPI.h @@ -46,6 +46,7 @@ llvm::Expected runRename(ClangdServer &Server, PathRef File, llvm::Expected runPrepareRename(ClangdServer &Server, PathRef File, Position Pos, + llvm::Optional NewName, const clangd::RenameOptions &RenameOpts); llvm::Expected From ea274be72be1cbc37075412dbc455ef245ac0a75 Mon Sep 17 00:00:00 2001 From: Douglas Yung Date: Wed, 7 Oct 2020 12:23:51 -0700 Subject: [PATCH 261/321] Add REQUIRES: x86-registered-target to test as it was failing on build bots without x86. This should fix the failure on http://lab.llvm.org:8011/#/builders/91/builds/30 --- llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test b/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test index 34fc1f8c25e2c..e6efb2b2234cd 100644 --- a/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test +++ b/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test @@ -1,5 +1,6 @@ ; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test-32bit.elf.o --debug-frame | FileCheck %s -check-prefix FRAMES ; Note: the input file was generated from Inputs/dwarfdump-test-32bit.elf.c +; REQUIRES: x86-registered-target ; FRAMES: .debug_frame From a85e43e99676811a25f495169ee9fc9f11815a17 Mon Sep 17 00:00:00 2001 From: Ronak Chauhan Date: Thu, 8 Oct 2020 01:00:10 +0530 Subject: [PATCH 262/321] Remove D80713.diff added in 528057c19755ad842052fba3a42dcbf7deafc6de The diff file was added by mistake. --- D80713.diff | 848 ---------------------------------------------------- 1 file changed, 848 deletions(-) delete mode 100644 D80713.diff diff --git a/D80713.diff b/D80713.diff deleted file mode 100644 index e51f4e02ab783..0000000000000 --- a/D80713.diff +++ /dev/null @@ -1,848 +0,0 @@ -diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h ---- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h -+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h -@@ -162,39 +162,49 @@ - uint8_t reserved2[6]; - }; - -+enum : uint32_t { -+ GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0, -+ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4, -+ RESERVED0_OFFSET = 8, -+ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16, -+ RESERVED1_OFFSET = 24, -+ COMPUTE_PGM_RSRC3_OFFSET = 44, -+ COMPUTE_PGM_RSRC1_OFFSET = 48, -+ COMPUTE_PGM_RSRC2_OFFSET = 52, -+ KERNEL_CODE_PROPERTIES_OFFSET = 56, -+ RESERVED2_OFFSET = 58, -+}; -+ - static_assert( - sizeof(kernel_descriptor_t) == 64, - "invalid size for kernel_descriptor_t"); --static_assert( -- offsetof(kernel_descriptor_t, group_segment_fixed_size) == 0, -- "invalid offset for group_segment_fixed_size"); --static_assert( -- offsetof(kernel_descriptor_t, private_segment_fixed_size) == 4, -- "invalid offset for private_segment_fixed_size"); --static_assert( -- offsetof(kernel_descriptor_t, reserved0) == 8, -- "invalid offset for reserved0"); --static_assert( -- offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == 16, -- "invalid offset for kernel_code_entry_byte_offset"); --static_assert( -- offsetof(kernel_descriptor_t, reserved1) == 24, -- "invalid offset for reserved1"); --static_assert( -- offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == 44, -- "invalid offset for compute_pgm_rsrc3"); --static_assert( -- offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48, -- "invalid offset for compute_pgm_rsrc1"); --static_assert( -- offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == 52, -- "invalid offset for compute_pgm_rsrc2"); --static_assert( -- offsetof(kernel_descriptor_t, kernel_code_properties) == 56, -- "invalid offset for kernel_code_properties"); --static_assert( -- offsetof(kernel_descriptor_t, reserved2) == 58, -- "invalid offset for reserved2"); -+static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) == -+ GROUP_SEGMENT_FIXED_SIZE_OFFSET, -+ "invalid offset for group_segment_fixed_size"); -+static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) == -+ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET, -+ "invalid offset for private_segment_fixed_size"); -+static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET, -+ "invalid offset for reserved0"); -+static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == -+ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET, -+ "invalid offset for kernel_code_entry_byte_offset"); -+static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET, -+ "invalid offset for reserved1"); -+static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == -+ COMPUTE_PGM_RSRC3_OFFSET, -+ "invalid offset for compute_pgm_rsrc3"); -+static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == -+ COMPUTE_PGM_RSRC1_OFFSET, -+ "invalid offset for compute_pgm_rsrc1"); -+static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == -+ COMPUTE_PGM_RSRC2_OFFSET, -+ "invalid offset for compute_pgm_rsrc2"); -+static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) == -+ KERNEL_CODE_PROPERTIES_OFFSET, -+ "invalid offset for kernel_code_properties"); -+static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET, -+ "invalid offset for reserved2"); - - } // end namespace amdhsa - } // end namespace llvm -diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h ---- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h -+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h -@@ -17,10 +17,11 @@ - - #include "llvm/ADT/ArrayRef.h" - #include "llvm/MC/MCContext.h" --#include "llvm/MC/MCInstrInfo.h" - #include "llvm/MC/MCDisassembler/MCDisassembler.h" - #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" - #include "llvm/MC/MCDisassembler/MCSymbolizer.h" -+#include "llvm/MC/MCInstrInfo.h" -+#include "llvm/Support/DataExtractor.h" - - #include - #include -@@ -66,6 +67,33 @@ - DecodeStatus tryDecodeInst(const uint8_t* Table, MCInst &MI, uint64_t Inst, - uint64_t Address) const; - -+ Optional onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, -+ ArrayRef Bytes, -+ uint64_t Address, -+ raw_ostream &CStream) const override; -+ -+ DecodeStatus decodeKernelDescriptor(StringRef KdName, ArrayRef Bytes, -+ uint64_t KdAddress) const; -+ -+ DecodeStatus -+ decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, -+ ArrayRef Bytes, -+ raw_string_ostream &KdStream) const; -+ -+ /// Decode as directives that handle COMPUTE_PGM_RSRC1. -+ /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC1. -+ /// \param KdStream - Stream to write the disassembled directives to. -+ // NOLINTNEXTLINE(readability-identifier-naming) -+ DecodeStatus decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, -+ raw_string_ostream &KdStream) const; -+ -+ /// Decode as directives that handle COMPUTE_PGM_RSRC2. -+ /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC2. -+ /// \param KdStream - Stream to write the disassembled directives to. -+ // NOLINTNEXTLINE(readability-identifier-naming) -+ DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, -+ raw_string_ostream &KdStream) const; -+ - DecodeStatus convertSDWAInst(MCInst &MI) const; - DecodeStatus convertDPP8Inst(MCInst &MI) const; - DecodeStatus convertMIMGInst(MCInst &MI) const; -diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp ---- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp -+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp -@@ -34,6 +34,7 @@ - #include "llvm/MC/MCFixedLenDisassembler.h" - #include "llvm/MC/MCInst.h" - #include "llvm/MC/MCSubtargetInfo.h" -+#include "llvm/Support/AMDHSAKernelDescriptor.h" - #include "llvm/Support/Endian.h" - #include "llvm/Support/ErrorHandling.h" - #include "llvm/Support/MathExtras.h" -@@ -1215,6 +1216,350 @@ - return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; - } - -+//===----------------------------------------------------------------------===// -+// AMDGPU specific symbol handling -+//===----------------------------------------------------------------------===// -+#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ -+ do { \ -+ KdStream << Indent << DIRECTIVE " " \ -+ << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ -+ } while (0) -+ -+// NOLINTNEXTLINE(readability-identifier-naming) -+MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( -+ uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { -+ using namespace amdhsa; -+ StringRef Indent = "\t"; -+ -+ // We cannot accurately backward compute #VGPRs used from -+ // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same -+ // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we -+ // simply calculate the inverse of what the assembler does. -+ -+ uint32_t GranulatedWorkitemVGPRCount = -+ (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >> -+ COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT; -+ -+ uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) * -+ AMDGPU::IsaInfo::getVGPREncodingGranule(&STI); -+ -+ KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n'; -+ -+ // We cannot backward compute values used to calculate -+ // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following -+ // directives can't be computed: -+ // .amdhsa_reserve_vcc -+ // .amdhsa_reserve_flat_scratch -+ // .amdhsa_reserve_xnack_mask -+ // They take their respective default values if not specified in the assembly. -+ // -+ // GRANULATED_WAVEFRONT_SGPR_COUNT -+ // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK) -+ // -+ // We compute the inverse as though all directives apart from NEXT_FREE_SGPR -+ // are set to 0. So while disassembling we consider that: -+ // -+ // GRANULATED_WAVEFRONT_SGPR_COUNT -+ // = f(NEXT_FREE_SGPR + 0 + 0 + 0) -+ // -+ // The disassembler cannot recover the original values of those 3 directives. -+ -+ uint32_t GranulatedWavefrontSGPRCount = -+ (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >> -+ COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT; -+ -+ if (isGFX10() && GranulatedWavefrontSGPRCount) -+ return MCDisassembler::Fail; -+ -+ uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) * -+ AMDGPU::IsaInfo::getSGPREncodingGranule(&STI); -+ -+ KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n'; -+ KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n'; -+ KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n'; -+ KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n"; -+ -+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY) -+ return MCDisassembler::Fail; -+ -+ PRINT_DIRECTIVE(".amdhsa_float_round_mode_32", -+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); -+ PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64", -+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); -+ PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32", -+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); -+ PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64", -+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); -+ -+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV) -+ return MCDisassembler::Fail; -+ -+ PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); -+ -+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE) -+ return MCDisassembler::Fail; -+ -+ PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); -+ -+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY) -+ return MCDisassembler::Fail; -+ -+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER) -+ return MCDisassembler::Fail; -+ -+ PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL); -+ -+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0) -+ return MCDisassembler::Fail; -+ -+ if (isGFX10()) { -+ PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode", -+ COMPUTE_PGM_RSRC1_WGP_MODE); -+ PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED); -+ PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS); -+ } -+ return MCDisassembler::Success; -+} -+ -+// NOLINTNEXTLINE(readability-identifier-naming) -+MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( -+ uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { -+ using namespace amdhsa; -+ StringRef Indent = "\t"; -+ PRINT_DIRECTIVE( -+ ".amdhsa_system_sgpr_private_segment_wavefront_offset", -+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET); -+ PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x", -+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); -+ PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y", -+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); -+ PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z", -+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); -+ PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info", -+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); -+ PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id", -+ COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); -+ -+ if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH) -+ return MCDisassembler::Fail; -+ -+ if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY) -+ return MCDisassembler::Fail; -+ -+ if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE) -+ return MCDisassembler::Fail; -+ -+ PRINT_DIRECTIVE( -+ ".amdhsa_exception_fp_ieee_invalid_op", -+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); -+ PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src", -+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); -+ PRINT_DIRECTIVE( -+ ".amdhsa_exception_fp_ieee_div_zero", -+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); -+ PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow", -+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); -+ PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow", -+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); -+ PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact", -+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); -+ PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero", -+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); -+ -+ if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0) -+ return MCDisassembler::Fail; -+ -+ return MCDisassembler::Success; -+} -+ -+#undef PRINT_DIRECTIVE -+ -+MCDisassembler::DecodeStatus -+AMDGPUDisassembler::decodeKernelDescriptorDirective( -+ DataExtractor::Cursor &Cursor, ArrayRef Bytes, -+ raw_string_ostream &KdStream) const { -+#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ -+ do { \ -+ KdStream << Indent << DIRECTIVE " " \ -+ << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ -+ } while (0) -+ -+ uint16_t TwoByteBuffer = 0; -+ uint32_t FourByteBuffer = 0; -+ uint64_t EightByteBuffer = 0; -+ -+ StringRef ReservedBytes; -+ StringRef Indent = "\t"; -+ -+ assert(Bytes.size() == 64); -+ DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8); -+ -+ switch (Cursor.tell()) { -+ case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET: -+ FourByteBuffer = DE.getU32(Cursor); -+ KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer -+ << '\n'; -+ return MCDisassembler::Success; -+ -+ case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET: -+ FourByteBuffer = DE.getU32(Cursor); -+ KdStream << Indent << ".amdhsa_private_segment_fixed_size " -+ << FourByteBuffer << '\n'; -+ return MCDisassembler::Success; -+ -+ case amdhsa::RESERVED0_OFFSET: -+ // 8 reserved bytes, must be 0. -+ EightByteBuffer = DE.getU64(Cursor); -+ if (EightByteBuffer) { -+ return MCDisassembler::Fail; -+ } -+ return MCDisassembler::Success; -+ -+ case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET: -+ // KERNEL_CODE_ENTRY_BYTE_OFFSET -+ // So far no directive controls this for Code Object V3, so simply skip for -+ // disassembly. -+ DE.skip(Cursor, 8); -+ return MCDisassembler::Success; -+ -+ case amdhsa::RESERVED1_OFFSET: -+ // 20 reserved bytes, must be 0. -+ ReservedBytes = DE.getBytes(Cursor, 20); -+ for (int I = 0; I < 20; ++I) { -+ if (ReservedBytes[I] != 0) { -+ return MCDisassembler::Fail; -+ } -+ } -+ return MCDisassembler::Success; -+ -+ case amdhsa::COMPUTE_PGM_RSRC3_OFFSET: -+ // COMPUTE_PGM_RSRC3 -+ // - Only set for GFX10, GFX6-9 have this to be 0. -+ // - Currently no directives directly control this. -+ FourByteBuffer = DE.getU32(Cursor); -+ if (!isGFX10() && FourByteBuffer) { -+ return MCDisassembler::Fail; -+ } -+ return MCDisassembler::Success; -+ -+ case amdhsa::COMPUTE_PGM_RSRC1_OFFSET: -+ FourByteBuffer = DE.getU32(Cursor); -+ if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) == -+ MCDisassembler::Fail) { -+ return MCDisassembler::Fail; -+ } -+ return MCDisassembler::Success; -+ -+ case amdhsa::COMPUTE_PGM_RSRC2_OFFSET: -+ FourByteBuffer = DE.getU32(Cursor); -+ if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) == -+ MCDisassembler::Fail) { -+ return MCDisassembler::Fail; -+ } -+ return MCDisassembler::Success; -+ -+ case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET: -+ using namespace amdhsa; -+ TwoByteBuffer = DE.getU16(Cursor); -+ -+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer", -+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); -+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr", -+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); -+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr", -+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); -+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr", -+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); -+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id", -+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); -+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init", -+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); -+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size", -+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); -+ -+ if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0) -+ return MCDisassembler::Fail; -+ -+ // Reserved for GFX9 -+ if (isGFX9() && -+ (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) { -+ return MCDisassembler::Fail; -+ } else if (isGFX10()) { -+ PRINT_DIRECTIVE(".amdhsa_wavefront_size32", -+ KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); -+ } -+ -+ if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) -+ return MCDisassembler::Fail; -+ -+ return MCDisassembler::Success; -+ -+ case amdhsa::RESERVED2_OFFSET: -+ // 6 bytes from here are reserved, must be 0. -+ ReservedBytes = DE.getBytes(Cursor, 6); -+ for (int I = 0; I < 6; ++I) { -+ if (ReservedBytes[I] != 0) -+ return MCDisassembler::Fail; -+ } -+ return MCDisassembler::Success; -+ -+ default: -+ llvm_unreachable("Unhandled index. Case statements cover everything."); -+ return MCDisassembler::Fail; -+ } -+#undef PRINT_DIRECTIVE -+} -+ -+MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor( -+ StringRef KdName, ArrayRef Bytes, uint64_t KdAddress) const { -+ // CP microcode requires the kernel descriptor to be 64 aligned. -+ if (Bytes.size() != 64 || KdAddress % 64 != 0) -+ return MCDisassembler::Fail; -+ -+ std::string Kd; -+ raw_string_ostream KdStream(Kd); -+ KdStream << ".amdhsa_kernel " << KdName << '\n'; -+ -+ DataExtractor::Cursor C(0); -+ while (C && C.tell() < Bytes.size()) { -+ MCDisassembler::DecodeStatus Status = -+ decodeKernelDescriptorDirective(C, Bytes, KdStream); -+ -+ cantFail(C.takeError()); -+ -+ if (Status == MCDisassembler::Fail) -+ return MCDisassembler::Fail; -+ } -+ KdStream << ".end_amdhsa_kernel\n"; -+ outs() << KdStream.str(); -+ return MCDisassembler::Success; -+} -+ -+Optional -+AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, -+ ArrayRef Bytes, uint64_t Address, -+ raw_ostream &CStream) const { -+ // Right now only kernel descriptor needs to be handled. -+ // We ignore all other symbols for target specific handling. -+ // TODO: -+ // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code -+ // Object V2 and V3 when symbols are marked protected. -+ -+ // amd_kernel_code_t for Code Object V2. -+ if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) { -+ Size = 256; -+ return MCDisassembler::Fail; -+ } -+ -+ // Code Object V3 kernel descriptors. -+ StringRef Name = Symbol.Name; -+ if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) { -+ Size = 64; // Size = 64 regardless of success or failure. -+ return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address); -+ } -+ return None; -+} -+ - //===----------------------------------------------------------------------===// - // AMDGPUSymbolizer - //===----------------------------------------------------------------------===// -diff --git a/llvm/test/CodeGen/AMDGPU/nop-data.ll b/llvm/test/CodeGen/AMDGPU/nop-data.ll ---- a/llvm/test/CodeGen/AMDGPU/nop-data.ll -+++ b/llvm/test/CodeGen/AMDGPU/nop-data.ll -@@ -1,7 +1,7 @@ - ; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-code-object-v3 -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - --mcpu=fiji | FileCheck %s - - ; CHECK: : --; CHECK-NEXT: s_endpgm -+; CHECK: s_endpgm - define amdgpu_kernel void @kernel0() align 256 { - entry: - ret void -@@ -80,7 +80,7 @@ - - ; CHECK-EMPTY: - ; CHECK-NEXT: : --; CHECK-NEXT: s_endpgm -+; CHECK: s_endpgm - define amdgpu_kernel void @kernel1(i32 addrspace(1)* addrspace(4)* %ptr.out) align 256 { - entry: - ret void -diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s -new file mode 100644 ---- /dev/null -+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s -@@ -0,0 +1,37 @@ -+;; Failure test. We create a malformed kernel descriptor (KD) by manually -+;; setting the bytes, because one can't create a malformed KD using the -+;; assembler directives. -+ -+; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t.o -+ -+; RUN: printf ".type my_kernel.kd, @object \nmy_kernel.kd:\n.size my_kernel.kd, 64\n" > %t1.sym_info -+; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t.o \ -+; RUN: | tail -n +9 > %t1.sym_content -+; RUN: cat %t1.sym_info %t1.sym_content > %t1.s -+ -+; RUN: llvm-mc %t1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t-re-assemble.o -+; RUN: diff %t.o %t-re-assemble.o -+ -+;; Test failure by setting one of the reserved bytes to non-zero value. -+ -+.type my_kernel.kd, @object -+.size my_kernel.kd, 64 -+my_kernel.kd: -+ .long 0x00000000 ;; group_segment_fixed_size -+ .long 0x00000000 ;; private_segment_fixed_size -+ .quad 0x00FF000000000000 ;; reserved bytes. -+ .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. -+ -+ ;; 20 reserved bytes. -+ .quad 0x0000000000000000 -+ .quad 0x0000000000000000 -+ .long 0x00000000 -+ -+ .long 0x00000000 ;; compute_PGM_RSRC3 -+ .long 0x00000000 ;; compute_PGM_RSRC1 -+ .long 0x00000000 ;; compute_PGM_RSRC2 -+ .short 0x0000 ;; additional fields. -+ -+ ;; 6 reserved bytes. -+ .long 0x0000000 -+ .short 0x0000 -diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s -new file mode 100644 ---- /dev/null -+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s -@@ -0,0 +1,49 @@ -+;; Test disassembly for GRANULATED_WAVEFRONT_SGPR_COUNT in the kernel descriptor. -+ -+; RUN: split-file %s %t.dir -+ -+; RUN: llvm-mc %t.dir/1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 -+; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ -+; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble -+; RUN: diff %t1 %t1-re-assemble -+ -+; RUN: llvm-mc %t.dir/2.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 -+; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ -+; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble -+; RUN: diff %t2 %t2-re-assemble -+ -+; RUN: llvm-mc %t.dir/3.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 -+; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ -+; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble -+; RUN: diff %t3 %t3-re-assemble -+ -+ -+;--- 1.s -+;; Only set next_free_sgpr. -+.amdhsa_kernel my_kernel_1 -+ .amdhsa_next_free_vgpr 0 -+ .amdhsa_next_free_sgpr 42 -+ .amdhsa_reserve_flat_scratch 0 -+ .amdhsa_reserve_xnack_mask 0 -+ .amdhsa_reserve_vcc 0 -+.end_amdhsa_kernel -+ -+;--- 2.s -+;; Only set other directives. -+.amdhsa_kernel my_kernel_2 -+ .amdhsa_next_free_vgpr 0 -+ .amdhsa_next_free_sgpr 0 -+ .amdhsa_reserve_flat_scratch 1 -+ .amdhsa_reserve_xnack_mask 1 -+ .amdhsa_reserve_vcc 1 -+.end_amdhsa_kernel -+ -+;--- 3.s -+;; Set all affecting directives. -+.amdhsa_kernel my_kernel_3 -+ .amdhsa_next_free_vgpr 0 -+ .amdhsa_next_free_sgpr 35 -+ .amdhsa_reserve_flat_scratch 1 -+ .amdhsa_reserve_xnack_mask 1 -+ .amdhsa_reserve_vcc 1 -+.end_amdhsa_kernel -diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s -new file mode 100644 ---- /dev/null -+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s -@@ -0,0 +1,36 @@ -+;; Test disassembly for GRANULATED_WORKITEM_VGPR_COUNT in the kernel descriptor. -+ -+; RUN: split-file %s %t.dir -+ -+; RUN: llvm-mc %t.dir/1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 -+; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ -+; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble -+; RUN: diff %t1 %t1-re-assemble -+ -+; RUN: llvm-mc %t.dir/2.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 -+; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ -+; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble -+; RUN: diff %t2 %t2-re-assemble -+ -+; RUN: llvm-mc %t.dir/3.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 -+; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ -+; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble -+; RUN: diff %t3 %t3-re-assemble -+ -+;--- 1.s -+.amdhsa_kernel my_kernel_1 -+ .amdhsa_next_free_vgpr 23 -+ .amdhsa_next_free_sgpr 0 -+.end_amdhsa_kernel -+ -+;--- 2.s -+.amdhsa_kernel my_kernel_2 -+ .amdhsa_next_free_vgpr 14 -+ .amdhsa_next_free_sgpr 0 -+.end_amdhsa_kernel -+ -+;--- 3.s -+.amdhsa_kernel my_kernel_3 -+ .amdhsa_next_free_vgpr 32 -+ .amdhsa_next_free_sgpr 0 -+.end_amdhsa_kernel -diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s -new file mode 100644 ---- /dev/null -+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s -@@ -0,0 +1,58 @@ -+;; Entirely zeroed kernel descriptor (for GFX10). -+ -+; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj -o %t -+; RUN: llvm-objdump -s -j .text %t | FileCheck --check-prefix=OBJDUMP %s -+ -+;; TODO: -+;; This file and kd-zeroed-raw.s should produce the same output for the kernel -+;; descriptor - a block of 64 zeroed bytes. But looks like the assembler sets -+;; the FWD_PROGRESS bit in COMPUTE_PGM_RSRC1 to 1 even when the directive -+;; mentions 0 (see line 36). -+ -+;; Check the raw bytes right now. -+ -+; OBJDUMP: 0000 00000000 00000000 00000000 00000000 -+; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -+; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -+; OBJDUMP-NEXT: 0030 01000000 00000000 00000000 00000000 -+ -+.amdhsa_kernel my_kernel -+ .amdhsa_group_segment_fixed_size 0 -+ .amdhsa_private_segment_fixed_size 0 -+ .amdhsa_next_free_vgpr 8 -+ .amdhsa_reserve_vcc 0 -+ .amdhsa_reserve_flat_scratch 0 -+ .amdhsa_reserve_xnack_mask 0 -+ .amdhsa_next_free_sgpr 8 -+ .amdhsa_float_round_mode_32 0 -+ .amdhsa_float_round_mode_16_64 0 -+ .amdhsa_float_denorm_mode_32 0 -+ .amdhsa_float_denorm_mode_16_64 0 -+ .amdhsa_dx10_clamp 0 -+ .amdhsa_ieee_mode 0 -+ .amdhsa_fp16_overflow 0 -+ .amdhsa_workgroup_processor_mode 0 -+ .amdhsa_memory_ordered 0 -+ .amdhsa_forward_progress 0 -+ .amdhsa_system_sgpr_private_segment_wavefront_offset 0 -+ .amdhsa_system_sgpr_workgroup_id_x 0 -+ .amdhsa_system_sgpr_workgroup_id_y 0 -+ .amdhsa_system_sgpr_workgroup_id_z 0 -+ .amdhsa_system_sgpr_workgroup_info 0 -+ .amdhsa_system_vgpr_workitem_id 0 -+ .amdhsa_exception_fp_ieee_invalid_op 0 -+ .amdhsa_exception_fp_denorm_src 0 -+ .amdhsa_exception_fp_ieee_div_zero 0 -+ .amdhsa_exception_fp_ieee_overflow 0 -+ .amdhsa_exception_fp_ieee_underflow 0 -+ .amdhsa_exception_fp_ieee_inexact 0 -+ .amdhsa_exception_int_div_zero 0 -+ .amdhsa_user_sgpr_private_segment_buffer 0 -+ .amdhsa_user_sgpr_dispatch_ptr 0 -+ .amdhsa_user_sgpr_queue_ptr 0 -+ .amdhsa_user_sgpr_kernarg_segment_ptr 0 -+ .amdhsa_user_sgpr_dispatch_id 0 -+ .amdhsa_user_sgpr_flat_scratch_init 0 -+ .amdhsa_user_sgpr_private_segment_size 0 -+ .amdhsa_wavefront_size32 0 -+.end_amdhsa_kernel -diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s -new file mode 100644 ---- /dev/null -+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s -@@ -0,0 +1,53 @@ -+;; Entirely zeroed kernel descriptor (for GFX9). -+ -+; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 -+; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ -+; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 -+; RUN: diff %t1 %t2 -+ -+; RUN: llvm-objdump -s -j .text %t1 | FileCheck --check-prefix=OBJDUMP %s -+ -+; OBJDUMP: 0000 00000000 00000000 00000000 00000000 -+; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -+; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -+; OBJDUMP-NEXT: 0030 00000000 00000000 00000000 00000000 -+ -+;; This file and kd-zeroed-raw.s produce the same output for the kernel -+;; descriptor - a block of 64 zeroed bytes. -+ -+.amdhsa_kernel my_kernel -+ .amdhsa_group_segment_fixed_size 0 -+ .amdhsa_private_segment_fixed_size 0 -+ .amdhsa_next_free_vgpr 0 -+ .amdhsa_reserve_vcc 0 -+ .amdhsa_reserve_flat_scratch 0 -+ .amdhsa_reserve_xnack_mask 0 -+ .amdhsa_next_free_sgpr 0 -+ .amdhsa_float_round_mode_32 0 -+ .amdhsa_float_round_mode_16_64 0 -+ .amdhsa_float_denorm_mode_32 0 -+ .amdhsa_float_denorm_mode_16_64 0 -+ .amdhsa_dx10_clamp 0 -+ .amdhsa_ieee_mode 0 -+ .amdhsa_fp16_overflow 0 -+ .amdhsa_system_sgpr_private_segment_wavefront_offset 0 -+ .amdhsa_system_sgpr_workgroup_id_x 0 -+ .amdhsa_system_sgpr_workgroup_id_y 0 -+ .amdhsa_system_sgpr_workgroup_id_z 0 -+ .amdhsa_system_sgpr_workgroup_info 0 -+ .amdhsa_system_vgpr_workitem_id 0 -+ .amdhsa_exception_fp_ieee_invalid_op 0 -+ .amdhsa_exception_fp_denorm_src 0 -+ .amdhsa_exception_fp_ieee_div_zero 0 -+ .amdhsa_exception_fp_ieee_overflow 0 -+ .amdhsa_exception_fp_ieee_underflow 0 -+ .amdhsa_exception_fp_ieee_inexact 0 -+ .amdhsa_exception_int_div_zero 0 -+ .amdhsa_user_sgpr_private_segment_buffer 0 -+ .amdhsa_user_sgpr_dispatch_ptr 0 -+ .amdhsa_user_sgpr_queue_ptr 0 -+ .amdhsa_user_sgpr_kernarg_segment_ptr 0 -+ .amdhsa_user_sgpr_dispatch_id 0 -+ .amdhsa_user_sgpr_flat_scratch_init 0 -+ .amdhsa_user_sgpr_private_segment_size 0 -+.end_amdhsa_kernel -diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s -new file mode 100644 ---- /dev/null -+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s -@@ -0,0 +1,41 @@ -+; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 -+; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ -+; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 -+; RUN: llvm-objdump -s -j .text %t2 | FileCheck --check-prefix=OBJDUMP %s -+ -+;; Not running lit-test over gfx10 (see kd-zeroed-gfx10.s for details). -+;; kd-zeroed-raw.s and kd-zeroed-*.s should produce the same output for the -+;; kernel descriptor - a block of 64 zeroed bytes. -+ -+;; The disassembly will produce the contents of kd-zeroed-*.s which on being -+;; assembled contains additional relocation info. A diff over the entire object -+;; will fail in this case. So we check by looking the bytes in .text. -+ -+; OBJDUMP: 0000 00000000 00000000 00000000 00000000 -+; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -+; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -+; OBJDUMP-NEXT: 0030 00000000 00000000 00000000 00000000 -+ -+;; The entire object is zeroed out. -+ -+.type my_kernel.kd, @object -+.size my_kernel.kd, 64 -+my_kernel.kd: -+ .long 0x00000000 ;; group_segment_fixed_size -+ .long 0x00000000 ;; private_segment_fixed_size -+ .quad 0x0000000000000000 ;; reserved bytes. -+ .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. -+ -+ ;; 20 reserved bytes. -+ .quad 0x0000000000000000 -+ .quad 0x0000000000000000 -+ .long 0x00000000 -+ -+ .long 0x00000000 ;; compute_PGM_RSRC3 -+ .long 0x00000000 ;; compute_PGM_RSRC1 -+ .long 0x00000000 ;; compute_PGM_RSRC2 -+ .short 0x0000 ;; additional fields. -+ -+ ;; 6 reserved bytes. -+ .long 0x0000000 -+ .short 0x0000 -diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp ---- a/llvm/tools/llvm-objdump/llvm-objdump.cpp -+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp -@@ -1854,23 +1854,6 @@ - outs() << SectionName << ":\n"; - } - -- if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { -- if (Symbols[SI].Type == ELF::STT_AMDGPU_HSA_KERNEL) { -- // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes) -- Start += 256; -- } -- if (SI == SE - 1 || -- Symbols[SI + 1].Type == ELF::STT_AMDGPU_HSA_KERNEL) { -- // cut trailing zeroes at the end of kernel -- // cut up to 256 bytes -- const uint64_t EndAlign = 256; -- const auto Limit = End - (std::min)(EndAlign, End - Start); -- while (End > Limit && -- *reinterpret_cast(&Bytes[End - 4]) == 0) -- End -= 4; -- } -- } -- - outs() << '\n'; - if (!NoLeadingAddr) - outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", - From fe0197e194a64f950602fb50736b6648a9e5b2a9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 7 Oct 2020 20:59:31 +0100 Subject: [PATCH 263/321] [InstCombine] Add checks for and(logicalshift(zext(x),undef),y) cases Prep work before some cleanup in narrowMaskedBinOp --- .../test/Transforms/InstCombine/and-narrow.ll | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/and-narrow.ll b/llvm/test/Transforms/InstCombine/and-narrow.ll index a8661a9f79657..d53aef9ce97c6 100644 --- a/llvm/test/Transforms/InstCombine/and-narrow.ll +++ b/llvm/test/Transforms/InstCombine/and-narrow.ll @@ -175,6 +175,16 @@ define <2 x i16> @zext_lshr_vec_overshift(<2 x i8> %x) { ret <2 x i16> %r } +define <2 x i16> @zext_lshr_vec_undef(<2 x i8> %x) { +; CHECK-LABEL: @zext_lshr_vec_undef( +; CHECK-NEXT: ret <2 x i16> zeroinitializer +; + %z = zext <2 x i8> %x to <2 x i16> + %b = lshr <2 x i16> %z, undef + %r = and <2 x i16> %b, %z + ret <2 x i16> %r +} + ; Don't create poison by narrowing a shift below the shift amount. define <2 x i16> @zext_shl_vec_overshift(<2 x i8> %x) { @@ -190,3 +200,13 @@ define <2 x i16> @zext_shl_vec_overshift(<2 x i8> %x) { ret <2 x i16> %r } +define <2 x i16> @zext_shl_vec_undef(<2 x i8> %x) { +; CHECK-LABEL: @zext_shl_vec_undef( +; CHECK-NEXT: ret <2 x i16> zeroinitializer +; + %z = zext <2 x i8> %x to <2 x i16> + %b = shl <2 x i16> %z, undef + %r = and <2 x i16> %b, %z + ret <2 x i16> %r +} + From 365ef499d6005d5842d5e87f9dafe63c2508c881 Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Wed, 7 Oct 2020 14:36:00 -0500 Subject: [PATCH 264/321] [SVE] Add legalisation tests to sve-fixed-length-fp-reduce.ll --- .../AArch64/sve-fixed-length-fp-reduce.ll | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll index 10aca253e4e6a..e38d18a9e4633 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll @@ -59,6 +59,15 @@ define half @fmaxv_v32f16(<32 x half>* %a) #0 { ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] ; VBITS_GE_512-NEXT: fmaxnmv h0, [[PG]], [[OP]].h ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: fmaxnm [[MAX:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h +; VBITS_EQ_256-DAG: fmaxnmv h0, [[PG]], [[MAX]].h +; VBITS_EQ_256-NEXT: ret %op = load <32 x half>, <32 x half>* %a %res = call half @llvm.vector.reduce.fmax.v32f16(<32 x half> %op) ret half %res @@ -121,6 +130,15 @@ define float @fmaxv_v16f32(<16 x float>* %a) #0 { ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] ; VBITS_GE_512-NEXT: fmaxnmv s0, [[PG]], [[OP]].s ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: fmaxnm [[MAX:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s +; VBITS_EQ_256-DAG: fmaxnmv s0, [[PG]], [[MAX]].s +; VBITS_EQ_256-NEXT: ret %op = load <16 x float>, <16 x float>* %a %res = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %op) ret float %res @@ -183,6 +201,15 @@ define double @fmaxv_v8f64(<8 x double>* %a) #0 { ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] ; VBITS_GE_512-NEXT: fmaxnmv d0, [[PG]], [[OP]].d ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: fmaxnm [[MAX:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: fmaxnmv d0, [[PG]], [[MAX]].d +; VBITS_EQ_256-NEXT: ret %op = load <8 x double>, <8 x double>* %a %res = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %op) ret double %res @@ -249,6 +276,15 @@ define half @fminv_v32f16(<32 x half>* %a) #0 { ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] ; VBITS_GE_512-NEXT: fminnmv h0, [[PG]], [[OP]].h ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: fminnm [[MIN:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h +; VBITS_EQ_256-DAG: fminnmv h0, [[PG]], [[MIN]].h +; VBITS_EQ_256-NEXT: ret %op = load <32 x half>, <32 x half>* %a %res = call half @llvm.vector.reduce.fmin.v32f16(<32 x half> %op) ret half %res @@ -311,6 +347,15 @@ define float @fminv_v16f32(<16 x float>* %a) #0 { ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] ; VBITS_GE_512-NEXT: fminnmv s0, [[PG]], [[OP]].s ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: fminnm [[MIN:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s +; VBITS_EQ_256-DAG: fminnmv s0, [[PG]], [[MIN]].s +; VBITS_EQ_256-NEXT: ret %op = load <16 x float>, <16 x float>* %a %res = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %op) ret float %res @@ -373,6 +418,15 @@ define double @fminv_v8f64(<8 x double>* %a) #0 { ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] ; VBITS_GE_512-NEXT: fminnmv d0, [[PG]], [[OP]].d ; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: fminnm [[MIN:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: fminnmv d0, [[PG]], [[MIN]].d +; VBITS_EQ_256-NEXT: ret %op = load <8 x double>, <8 x double>* %a %res = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %op) ret double %res From 34cd06a9b3bddaa7a989c606bbf1327ee651711c Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 7 Oct 2020 13:21:20 -0700 Subject: [PATCH 265/321] [BasicBlockSections] Make sure that the labels for address-taken blocks are emitted after switching the seciton. Currently, AsmPrinter code is organized in a way in which the labels of address-taken blocks are emitted in the previous section, which makes the relocation incorrect. This patch reorganizes the code to switch to the basic block section before handling address-taken blocks. Reviewed By: snehasish, MaskRay Differential Revision: https://reviews.llvm.org/D88517 --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 36 ++++++++++--------- ...basic-block-sections-blockaddress-taken.ll | 35 ++++++++++++++++++ 2 files changed, 55 insertions(+), 16 deletions(-) create mode 100644 llvm/test/CodeGen/X86/basic-block-sections-blockaddress-taken.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 613e7ebff2dfd..f45f8b7cb9603 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -3047,6 +3047,16 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { if (Alignment != Align(1)) emitAlignment(Alignment); + // Switch to a new section if this basic block must begin a section. The + // entry block is always placed in the function section and is handled + // separately. + if (MBB.isBeginSection() && !MBB.pred_empty()) { + OutStreamer->SwitchSection( + getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(), + MBB, TM)); + CurrentSectionBeginSym = MBB.getSymbol(); + } + // If the block has its address taken, emit any labels that were used to // reference the block. It is possible that there is more than one label // here, because multiple LLVM BB's may have been RAUW'd to this block after @@ -3077,6 +3087,7 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { emitBasicBlockLoopComments(MBB, MLI, *this); } + // Print the main label for the block. if (MBB.pred_empty() || (!MF->hasBBLabels() && isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry() && !MBB.hasLabelMustBeEmitted())) { @@ -3086,24 +3097,17 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { false); } } else { - if (isVerbose() && MBB.hasLabelMustBeEmitted()) { + if (isVerbose() && MBB.hasLabelMustBeEmitted()) OutStreamer->AddComment("Label of block must be emitted"); - } - auto *BBSymbol = MBB.getSymbol(); - // Switch to a new section if this basic block must begin a section. - if (MBB.isBeginSection()) { - OutStreamer->SwitchSection( - getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(), - MBB, TM)); - CurrentSectionBeginSym = BBSymbol; - } - OutStreamer->emitLabel(BBSymbol); - // With BB sections, each basic block must handle CFI information on its own - // if it begins a section. - if (MBB.isBeginSection()) - for (const HandlerInfo &HI : Handlers) - HI.Handler->beginBasicBlock(MBB); + OutStreamer->emitLabel(MBB.getSymbol()); } + + // With BB sections, each basic block must handle CFI information on its own + // if it begins a section (Entry block is handled separately by + // AsmPrinterHandler::beginFunction). + if (MBB.isBeginSection() && !MBB.pred_empty()) + for (const HandlerInfo &HI : Handlers) + HI.Handler->beginBasicBlock(MBB); } void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) { diff --git a/llvm/test/CodeGen/X86/basic-block-sections-blockaddress-taken.ll b/llvm/test/CodeGen/X86/basic-block-sections-blockaddress-taken.ll new file mode 100644 index 0000000000000..aaae1cf2a942c --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-blockaddress-taken.ll @@ -0,0 +1,35 @@ +;; This test verifies that basic-block-sections works with address-taken basic blocks. +; RUN: llc < %s -mtriple=x86_64 -basic-block-sections=all | FileCheck %s + +define void @foo(i1 zeroext %0) nounwind { +entry: + %1 = select i1 %0, i8* blockaddress(@foo, %bb1), i8* blockaddress(@foo, %bb2) ; [#uses=1] + indirectbr i8* %1, [label %bb1, label %bb2] + +; CHECK: .text +; CHECK-LABEL: foo: +; CHECK: movl $.Ltmp0, %eax +; CHECK-NEXT: movl $.Ltmp1, %ecx +; CHECK-NEXT: cmovneq %rax, %rcx +; CHECK-NEXT: jmpq *%rcx + +bb1: ; preds = %entry + %2 = call i32 @bar() + ret void +; CHECK: .section .text,"ax",@progbits,unique,1 +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: foo.1 +; CHECK-NEXT: callq bar +; + +bb2: ; preds = %entry + %3 = call i32 @baz() + ret void +; CHECK: .section .text,"ax",@progbits,unique,2 +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: foo.2 +; CHECK-NEXT: callq baz +} + +declare i32 @bar() +declare i32 @baz() From ac2018da616c0080785c4bc16307d98c5ebbffe7 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 7 Oct 2020 13:53:14 -0700 Subject: [PATCH 266/321] [NFC][MLInliner] Getters should return by reference --- llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp index d247f574455b4..793339fcf2714 100644 --- a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp +++ b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp @@ -298,9 +298,9 @@ class ModelUnderTrainingRunner final : public MLModelRunner { int64_t getFeature(int Index) const override; bool isValid() const { return !!Evaluator; } - const std::vector outputNames() const { return OutputNames; } + const std::vector &outputNames() const { return OutputNames; } - const std::vector outputSpecs() const { return OutputSpecs; } + const std::vector &outputSpecs() const { return OutputSpecs; } const Optional & lastEvaluationResult() const { From ef8b4e4fcd687f66ef0271d1257075f1f53dd34b Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 7 Oct 2020 14:02:10 -0700 Subject: [PATCH 267/321] Add validity assert on entry to CastInst::isNoopCast [NFC] This required some minor code reorganization to have a version of castIsValid which worked purely in terms of types. --- llvm/include/llvm/IR/InstrTypes.h | 7 +++++-- llvm/lib/IR/Instructions.cpp | 6 ++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index c86448ea72cb4..810e87de232b4 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -691,11 +691,14 @@ class CastInst : public UnaryInstruction { /// Return the destination type, as a convenience Type* getDestTy() const { return getType(); } - /// This method can be used to determine if a cast from S to DstTy using + /// This method can be used to determine if a cast from SrcTy to DstTy using /// Opcode op is valid or not. /// @returns true iff the proposed cast is valid. /// Determine if a cast is valid without creating one. - static bool castIsValid(Instruction::CastOps op, Value *S, Type *DstTy); + static bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy); + static bool castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) { + return castIsValid(op, S->getType(), DstTy); + } /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 445fad8bcbf41..b8663cdcbe839 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -2654,6 +2654,7 @@ bool CastInst::isNoopCast(Instruction::CastOps Opcode, Type *SrcTy, Type *DestTy, const DataLayout &DL) { + assert(castIsValid(Opcode, SrcTy, DestTy) && "method precondition"); switch (Opcode) { default: llvm_unreachable("Invalid CastOp"); case Instruction::Trunc: @@ -3352,10 +3353,7 @@ CastInst::getCastOpcode( /// it in one place and to eliminate the redundant code for getting the sizes /// of the types involved. bool -CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) { - // Check for type sanity on the arguments - Type *SrcTy = S->getType(); - +CastInst::castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy) { if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType() || SrcTy->isAggregateType() || DstTy->isAggregateType()) return false; From 4065a0d98f675595cdf568bae9bbd872de51fcba Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Wed, 7 Oct 2020 14:13:46 -0700 Subject: [PATCH 268/321] [mlir] [sparse] Rename getSparseMatrix to getMatrix Rationale: More consistent with the other names. Also forward looking to reading in other kinds of matrices. Also fixes lint issue on hard-coded %llu. Reviewed By: penpornk Differential Revision: https://reviews.llvm.org/D89005 --- .../Sparse/CPU/matrix-market-example.mlir | 6 +++--- mlir/lib/ExecutionEngine/SparseUtils.cpp | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/mlir/integration_test/Sparse/CPU/matrix-market-example.mlir b/mlir/integration_test/Sparse/CPU/matrix-market-example.mlir index 31fb20fa11ccf..56c392e8133bf 100644 --- a/mlir/integration_test/Sparse/CPU/matrix-market-example.mlir +++ b/mlir/integration_test/Sparse/CPU/matrix-market-example.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ // RUN: -convert-scf-to-std -convert-vector-to-scf \ // RUN: -convert-linalg-to-llvm -convert-vector-to-llvm | \ -// RUN: SPARSE_MATRIX0="%mlir_integration_test_dir/data/test.mtx" \ +// RUN: MATRIX0="%mlir_integration_test_dir/data/test.mtx" \ // RUN: mlir-cpu-runner \ // RUN: -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ @@ -11,7 +11,7 @@ module { func @openMatrix(!llvm.ptr, memref, memref, memref) -> () func @readMatrixItem(memref, memref, memref) -> () func @closeMatrix() -> () - func @getSparseMatrix(index) -> (!llvm.ptr) + func @getMatrix(index) -> (!llvm.ptr) func @entry() { %d0 = constant 0.0 : f64 @@ -29,7 +29,7 @@ module { // Read the header of a sparse matrix. This yields the // size (m x n) and number of nonzero elements (nnz). // - %file = call @getSparseMatrix(%c0) : (index) -> (!llvm.ptr) + %file = call @getMatrix(%c0) : (index) -> (!llvm.ptr) call @openMatrix(%file, %m, %n, %nnz) : (!llvm.ptr, memref, memref, memref) -> () diff --git a/mlir/lib/ExecutionEngine/SparseUtils.cpp b/mlir/lib/ExecutionEngine/SparseUtils.cpp index 6942a7b260c5e..253a3f46e4579 100644 --- a/mlir/lib/ExecutionEngine/SparseUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseUtils.cpp @@ -162,11 +162,11 @@ extern "C" void closeMatrix() { sparseFilename = nullptr; } -// Helper method to read sparse matrix filenames from the environment, defined -// with the naming convention ${SPARSE_MATRIX0}, ${SPARSE_MATRIX1}, etc. -extern "C" char *getSparseMatrix(uint64_t id) { +// Helper method to read matrix filenames from the environment, defined +// with the naming convention ${MATRIX0}, ${MATRIX1}, etc. +extern "C" char *getMatrix(uint64_t id) { char var[80]; - sprintf(var, "SPARSE_MATRIX%lu", id); + sprintf(var, "MATRIX%" PRIu64, id); char *env = getenv(var); return env; } From 40a24541029357543ae47664a68a44710e6fee84 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Tue, 6 Oct 2020 18:54:20 -0700 Subject: [PATCH 269/321] Add regular expressions to and DWARF Call Frame Information tests in case the architecture specific target is not compiled into LLVM. This should fix any build bots that avoid compiling some architectures into llvm after https://reviews.llvm.org/D88767. Differential Revision: https://reviews.llvm.org/D88940 --- llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test b/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test index e6efb2b2234cd..6c049af43efe7 100644 --- a/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test +++ b/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test @@ -6,8 +6,8 @@ ; FRAMES: 00000000 00000010 ffffffff CIE ; FRAMES: Version: 1 -; FRAMES: DW_CFA_def_cfa: ESP +4 -; FRAMES-NEXT: DW_CFA_offset: EIP -4 +; FRAMES: DW_CFA_def_cfa: {{reg4|ESP}} +4 +; FRAMES-NEXT: DW_CFA_offset: {{reg8|EIP}} -4 ; FRAMES-NEXT: DW_CFA_nop: ; FRAMES-NEXT: DW_CFA_nop: @@ -19,9 +19,9 @@ ; FRAMES: 00000028 00000014 00000000 FDE cie=00000000 pc=00000030...00000080 ; FRAMES: DW_CFA_advance_loc: 1 ; FRAMES-NEXT: DW_CFA_def_cfa_offset: +8 -; FRAMES-NEXT: DW_CFA_offset: EBP -8 +; FRAMES-NEXT: DW_CFA_offset: {{reg5|EBP}} -8 ; FRAMES-NEXT: DW_CFA_advance_loc: 2 -; FRAMES-NEXT: DW_CFA_def_cfa_register: EBP +; FRAMES-NEXT: DW_CFA_def_cfa_register: {{reg5|EBP}} ; FRAMES-NOT: CIE ; FRAMES-NOT: FDE From da48fe1732fcd87005ca22745f930b97ab93f365 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Fri, 2 Oct 2020 16:18:47 -0700 Subject: [PATCH 270/321] [NPM] Port strip nonlinetable debuginfo pass to the new pass manager Fixes a few tests in llvm/test/Transforms/Utils. Differential Revision: https://reviews.llvm.org/D88762 --- llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Transforms/Utils.h | 2 +- .../Utils/StripNonLineTableDebugInfo.h | 26 +++++++++++++++++++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + .../Utils/StripNonLineTableDebugInfo.cpp | 23 +++++++++++----- llvm/lib/Transforms/Utils/Utils.cpp | 2 +- .../Util/strip-nonlinetable-debuginfo-cus.ll | 1 + 8 files changed, 48 insertions(+), 10 deletions(-) create mode 100644 llvm/include/llvm/Transforms/Utils/StripNonLineTableDebugInfo.h diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index bbc506ceca190..907d299ae0f3a 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -416,7 +416,7 @@ void initializeStripDebugDeclarePass(PassRegistry&); void initializeStripDebugMachineModulePass(PassRegistry &); void initializeStripGCRelocatesPass(PassRegistry&); void initializeStripNonDebugSymbolsPass(PassRegistry&); -void initializeStripNonLineTableDebugInfoPass(PassRegistry&); +void initializeStripNonLineTableDebugLegacyPassPass(PassRegistry &); void initializeStripSymbolsPass(PassRegistry&); void initializeStructurizeCFGPass(PassRegistry&); void initializeTailCallElimPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/Utils.h b/llvm/include/llvm/Transforms/Utils.h index 75edefac1cbda..9162a86183db6 100644 --- a/llvm/include/llvm/Transforms/Utils.h +++ b/llvm/include/llvm/Transforms/Utils.h @@ -117,7 +117,7 @@ extern char &LoopSimplifyID; /// This function returns a new pass that downgrades the debug info in the /// module to line tables only. -ModulePass *createStripNonLineTableDebugInfoPass(); +ModulePass *createStripNonLineTableDebugLegacyPass(); //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Utils/StripNonLineTableDebugInfo.h b/llvm/include/llvm/Transforms/Utils/StripNonLineTableDebugInfo.h new file mode 100644 index 0000000000000..20d0aabd29385 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/StripNonLineTableDebugInfo.h @@ -0,0 +1,26 @@ +//===- StripNonLineTableDebugInfo.h - -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_STRIPNONLINETABLEDEBUGINFO_H +#define LLVM_TRANSFORMS_UTILS_STRIPNONLINETABLEDEBUGINFO_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Module; + +class StripNonLineTableDebugInfoPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_STRIPNONLINETABLEDEBUGINFO_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 38fe128d7c1e6..713c7e9af9b5e 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -205,6 +205,7 @@ #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/MetaRenamer.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" +#include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 867a3ec634a9e..f9a208b92b19d 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -97,6 +97,7 @@ MODULE_PASS("strip-dead-debug-info", StripDeadDebugInfoPass()) MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass()) MODULE_PASS("strip-debug-declare", StripDebugDeclarePass()) MODULE_PASS("strip-nondebug", StripNonDebugSymbolsPass()) +MODULE_PASS("strip-nonlinetable-debuginfo", StripNonLineTableDebugInfoPass()) MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass()) MODULE_PASS("verify", VerifierPass()) diff --git a/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp index 21cbbfb140b6d..10fda4df51ba3 100644 --- a/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp +++ b/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h" #include "llvm/IR/DebugInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -17,10 +18,11 @@ namespace { /// This pass strips all debug info that is not related line tables. /// The result will be the same as if the program where compiled with /// -gline-tables-only. -struct StripNonLineTableDebugInfo : public ModulePass { +struct StripNonLineTableDebugLegacyPass : public ModulePass { static char ID; // Pass identification, replacement for typeid - StripNonLineTableDebugInfo() : ModulePass(ID) { - initializeStripNonLineTableDebugInfoPass(*PassRegistry::getPassRegistry()); + StripNonLineTableDebugLegacyPass() : ModulePass(ID) { + initializeStripNonLineTableDebugLegacyPassPass( + *PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -33,10 +35,17 @@ struct StripNonLineTableDebugInfo : public ModulePass { }; } -char StripNonLineTableDebugInfo::ID = 0; -INITIALIZE_PASS(StripNonLineTableDebugInfo, "strip-nonlinetable-debuginfo", +char StripNonLineTableDebugLegacyPass::ID = 0; +INITIALIZE_PASS(StripNonLineTableDebugLegacyPass, + "strip-nonlinetable-debuginfo", "Strip all debug info except linetables", false, false) -ModulePass *llvm::createStripNonLineTableDebugInfoPass() { - return new StripNonLineTableDebugInfo(); +ModulePass *llvm::createStripNonLineTableDebugLegacyPass() { + return new StripNonLineTableDebugLegacyPass(); +} + +PreservedAnalyses +StripNonLineTableDebugInfoPass::run(Module &M, ModuleAnalysisManager &AM) { + llvm::stripNonLineTableDebugInfo(M); + return PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp index 1638635440a95..a3bed38bafd01 100644 --- a/llvm/lib/Transforms/Utils/Utils.cpp +++ b/llvm/lib/Transforms/Utils/Utils.cpp @@ -37,7 +37,7 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializeLowerSwitchLegacyPassPass(Registry); initializeNameAnonGlobalLegacyPassPass(Registry); initializePromoteLegacyPassPass(Registry); - initializeStripNonLineTableDebugInfoPass(Registry); + initializeStripNonLineTableDebugLegacyPassPass(Registry); initializeUnifyFunctionExitNodesPass(Registry); initializeMetaRenamerPass(Registry); initializeStripGCRelocatesPass(Registry); diff --git a/llvm/test/Transforms/Util/strip-nonlinetable-debuginfo-cus.ll b/llvm/test/Transforms/Util/strip-nonlinetable-debuginfo-cus.ll index f7ffdf9cf9a08..71145dded1bdf 100644 --- a/llvm/test/Transforms/Util/strip-nonlinetable-debuginfo-cus.ll +++ b/llvm/test/Transforms/Util/strip-nonlinetable-debuginfo-cus.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -strip-nonlinetable-debuginfo %s -o - | FileCheck %s +; RUN: opt -S -passes=strip-nonlinetable-debuginfo %s -o - | FileCheck %s !llvm.dbg.cu = !{!2, !6} !llvm.gcov = !{!3} !llvm.module.flags = !{!7} From 940d7aaea958fb343c7a72de89157aed2dc548de Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Fri, 2 Oct 2020 16:31:57 -0700 Subject: [PATCH 271/321] Port StripGCRelocates pass to NPM Fixes one test under NPM Differential Revision: https://reviews.llvm.org/D88766 --- llvm/include/llvm/InitializePasses.h | 2 +- .../llvm/Transforms/Utils/StripGCRelocates.h | 25 ++++++++++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + .../lib/Transforms/Utils/StripGCRelocates.cpp | 46 ++++++++++++------- llvm/lib/Transforms/Utils/Utils.cpp | 2 +- .../Transforms/Util/strip-gc-relocates.ll | 1 + 7 files changed, 59 insertions(+), 19 deletions(-) create mode 100644 llvm/include/llvm/Transforms/Utils/StripGCRelocates.h diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 907d299ae0f3a..ef1004b98d347 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -414,7 +414,7 @@ void initializeStripDeadDebugInfoPass(PassRegistry&); void initializeStripDeadPrototypesLegacyPassPass(PassRegistry&); void initializeStripDebugDeclarePass(PassRegistry&); void initializeStripDebugMachineModulePass(PassRegistry &); -void initializeStripGCRelocatesPass(PassRegistry&); +void initializeStripGCRelocatesLegacyPass(PassRegistry &); void initializeStripNonDebugSymbolsPass(PassRegistry&); void initializeStripNonLineTableDebugLegacyPassPass(PassRegistry &); void initializeStripSymbolsPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/Utils/StripGCRelocates.h b/llvm/include/llvm/Transforms/Utils/StripGCRelocates.h new file mode 100644 index 0000000000000..13e6d8ac26a7f --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/StripGCRelocates.h @@ -0,0 +1,25 @@ +//===- StripGCRelocates.h - -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_STRIPGCRELOCATES_H +#define LLVM_TRANSFORMS_UTILS_STRIPGCRELOCATES_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Function; + +class StripGCRelocates : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_STRIPGCRELOCATES_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 713c7e9af9b5e..92724ed22d02d 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -205,6 +205,7 @@ #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/MetaRenamer.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" +#include "llvm/Transforms/Utils/StripGCRelocates.h" #include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index f9a208b92b19d..79b942a21b071 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -279,6 +279,7 @@ FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass()) FUNCTION_PASS("spec-phis", SpeculateAroundPHIsPass()) FUNCTION_PASS("sroa", SROA()) +FUNCTION_PASS("strip-gc-relocates", StripGCRelocates()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) FUNCTION_PASS("vector-combine", VectorCombinePass()) FUNCTION_PASS("verify", VerifierPass()) diff --git a/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/llvm/lib/Transforms/Utils/StripGCRelocates.cpp index b559811d120bc..1fa574f04c370 100644 --- a/llvm/lib/Transforms/Utils/StripGCRelocates.cpp +++ b/llvm/lib/Transforms/Utils/StripGCRelocates.cpp @@ -13,6 +13,7 @@ // present. //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/StripGCRelocates.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -24,22 +25,7 @@ using namespace llvm; -namespace { -struct StripGCRelocates : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - StripGCRelocates() : FunctionPass(ID) { - initializeStripGCRelocatesPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &Info) const override {} - - bool runOnFunction(Function &F) override; - -}; -char StripGCRelocates::ID = 0; -} - -bool StripGCRelocates::runOnFunction(Function &F) { +static bool stripGCRelocates(Function &F) { // Nothing to do for declarations. if (F.isDeclaration()) return false; @@ -71,6 +57,32 @@ bool StripGCRelocates::runOnFunction(Function &F) { return !GCRelocates.empty(); } -INITIALIZE_PASS(StripGCRelocates, "strip-gc-relocates", +PreservedAnalyses StripGCRelocates::run(Function &F, + FunctionAnalysisManager &AM) { + if (!stripGCRelocates(F)) + return PreservedAnalyses::all(); + + // Removing gc.relocate preserves the CFG, but most other analysis probably + // need to re-run. + PreservedAnalyses PA; + PA.preserveSet(); + return PA; +} + +namespace { +struct StripGCRelocatesLegacy : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + StripGCRelocatesLegacy() : FunctionPass(ID) { + initializeStripGCRelocatesLegacyPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &Info) const override {} + + bool runOnFunction(Function &F) override { return ::stripGCRelocates(F); } +}; +char StripGCRelocatesLegacy::ID = 0; +} // namespace + +INITIALIZE_PASS(StripGCRelocatesLegacy, "strip-gc-relocates", "Strip gc.relocates inserted through RewriteStatepointsForGC", true, false) diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp index a3bed38bafd01..5f975965f1a50 100644 --- a/llvm/lib/Transforms/Utils/Utils.cpp +++ b/llvm/lib/Transforms/Utils/Utils.cpp @@ -40,7 +40,7 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializeStripNonLineTableDebugLegacyPassPass(Registry); initializeUnifyFunctionExitNodesPass(Registry); initializeMetaRenamerPass(Registry); - initializeStripGCRelocatesPass(Registry); + initializeStripGCRelocatesLegacyPass(Registry); initializePredicateInfoPrinterLegacyPassPass(Registry); initializeInjectTLIMappingsLegacyPass(Registry); initializeFixIrreduciblePass(Registry); diff --git a/llvm/test/Transforms/Util/strip-gc-relocates.ll b/llvm/test/Transforms/Util/strip-gc-relocates.ll index 77b8ffd079e92..9aa18ff7bf875 100644 --- a/llvm/test/Transforms/Util/strip-gc-relocates.ll +++ b/llvm/test/Transforms/Util/strip-gc-relocates.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -strip-gc-relocates -instcombine < %s | FileCheck %s +; RUN: opt -S -passes=strip-gc-relocates,instcombine < %s | FileCheck %s ; test utility/debugging pass which removes gc.relocates, inserted by -rewrite-statepoints-for-gc declare void @use_obj32(i32 addrspace(1)*) "gc-leaf-function" From 88afb6e86774c7d2ffe9385714e7810ea50636d2 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Thu, 1 Oct 2020 12:08:04 -0700 Subject: [PATCH 272/321] [flang] Semantic checks for bad usage of whole assumed-size arrays The semantics pass currently checks for several constraints that apply to the use of whole assumed-size arrays in various contexts, but C1002 wasn't really implemented. This patch implements C1002 by disallowing the use of whole assumed-size arrays in expressions and variables unless specifically allowed by the context. This centralizes the error reporting, which has been improved with a link to the array's declaration. Differential revision: https://reviews.llvm.org/D88691 --- flang/include/flang/Semantics/expression.h | 8 +++ flang/lib/Semantics/assignment.cpp | 5 -- flang/lib/Semantics/check-io.cpp | 8 --- flang/lib/Semantics/expression.cpp | 61 ++++++++++++++-------- flang/test/Semantics/assign04.f90 | 4 +- flang/test/Semantics/io03.f90 | 2 +- 6 files changed, 51 insertions(+), 37 deletions(-) diff --git a/flang/include/flang/Semantics/expression.h b/flang/include/flang/Semantics/expression.h index 7daeeba507f65..75cf4fe53664d 100644 --- a/flang/include/flang/Semantics/expression.h +++ b/flang/include/flang/Semantics/expression.h @@ -12,6 +12,7 @@ #include "semantics.h" #include "flang/Common/Fortran.h" #include "flang/Common/indirection.h" +#include "flang/Common/restorer.h" #include "flang/Evaluate/characteristics.h" #include "flang/Evaluate/check-expression.h" #include "flang/Evaluate/expression.h" @@ -139,6 +140,12 @@ class ExpressionAnalyzer { // its INTEGER kind type parameter. std::optional IsImpliedDo(parser::CharBlock) const; + // Allows a whole assumed-size array to appear for the lifetime of + // the returned value. + common::Restorer AllowWholeAssumedSizeArray() { + return common::ScopedSet(isWholeAssumedSizeArrayOk_, true); + } + Expr AnalyzeKindSelector(common::TypeCategory category, const std::optional &); @@ -372,6 +379,7 @@ class ExpressionAnalyzer { FoldingContext &foldingContext_{context_.foldingContext()}; std::map impliedDos_; // values are INTEGER kinds bool fatalErrors_{false}; + bool isWholeAssumedSizeArrayOk_{false}; friend class ArgumentAnalyzer; }; diff --git a/flang/lib/Semantics/assignment.cpp b/flang/lib/Semantics/assignment.cpp index 0b765c72fdd7c..090aae0af8cb7 100644 --- a/flang/lib/Semantics/assignment.cpp +++ b/flang/lib/Semantics/assignment.cpp @@ -66,11 +66,6 @@ void AssignmentContext::Analyze(const parser::AssignmentStmt &stmt) { const SomeExpr &rhs{assignment->rhs}; auto lhsLoc{std::get(stmt.t).GetSource()}; auto rhsLoc{std::get(stmt.t).source}; - auto shape{evaluate::GetShape(foldingContext(), lhs)}; - if (shape && !shape->empty() && !shape->back().has_value()) { // C1014 - Say(lhsLoc, - "Left-hand side of assignment may not be a whole assumed-size array"_err_en_US); - } if (CheckForPureContext(lhs, rhs, rhsLoc, false)) { const Scope &scope{context_.FindScope(lhsLoc)}; if (auto whyNot{WhyNotModifiable(lhsLoc, lhs, scope, true)}) { diff --git a/flang/lib/Semantics/check-io.cpp b/flang/lib/Semantics/check-io.cpp index 26702f6c48bf9..9095951389f26 100644 --- a/flang/lib/Semantics/check-io.cpp +++ b/flang/lib/Semantics/check-io.cpp @@ -298,14 +298,6 @@ void IoChecker::Enter(const parser::InputItem &spec) { return; } CheckForDefinableVariable(*var, "Input"); - const auto &name{GetLastName(*var)}; - const auto *expr{GetExpr(*var)}; - if (name.symbol && IsAssumedSizeArray(*name.symbol) && expr && - !evaluate::IsArrayElement(*GetExpr(*var))) { - context_.Say(name.source, - "Whole assumed size array '%s' may not be an input item"_err_en_US, - name.source); // C1231 - } } void IoChecker::Enter(const parser::InquireSpec &spec) { diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 5a2a7df9fb98d..661024f6990de 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -151,6 +151,7 @@ class ArgumentAnalyzer { std::vector, parser::MessageFixedText &&); MaybeExpr TryBoundOp(const Symbol &, int passIndex); std::optional AnalyzeExpr(const parser::Expr &); + MaybeExpr AnalyzeExprOrWholeAssumedSizeArray(const parser::Expr &); bool AreConformable() const; const Symbol *FindBoundOp(parser::CharBlock, int passIndex); void AddAssignmentConversion( @@ -673,6 +674,14 @@ MaybeExpr ExpressionAnalyzer::Analyze(const parser::Name &n) { n.symbol->attrs().reset(semantics::Attr::VOLATILE); } } + if (!isWholeAssumedSizeArrayOk_ && + semantics::IsAssumedSizeArray(*n.symbol)) { // C1002, C1014, C1231 + AttachDeclaration( + SayAt(n, + "Whole assumed-size array '%s' may not appear here without subscripts"_err_en_US, + n.source), + *n.symbol); + } return Designate(DataRef{*n.symbol}); } } @@ -885,7 +894,12 @@ std::vector ExpressionAnalyzer::AnalyzeSectionSubscripts( } MaybeExpr ExpressionAnalyzer::Analyze(const parser::ArrayElement &ae) { - if (MaybeExpr baseExpr{Analyze(ae.base)}) { + MaybeExpr baseExpr; + { + auto restorer{AllowWholeAssumedSizeArray()}; + baseExpr = Analyze(ae.base); + } + if (baseExpr) { if (ae.subscripts.empty()) { // will be converted to function call later or error reported return std::nullopt; @@ -2713,9 +2727,6 @@ void ArgumentAnalyzer::Analyze(const parser::Variable &x) { void ArgumentAnalyzer::Analyze( const parser::ActualArgSpec &arg, bool isSubroutine) { - // TODO: C1002: Allow a whole assumed-size array to appear if the dummy - // argument would accept it. Handle by special-casing the context - // ActualArg -> Variable -> Designator. // TODO: Actual arguments that are procedures and procedure pointers need to // be detected and represented (they're not expressions). // TODO: C1534: Don't allow a "restricted" specific intrinsic to be passed. @@ -2983,6 +2994,7 @@ void ArgumentAnalyzer::Dump(llvm::raw_ostream &os) { } } } + std::optional ArgumentAnalyzer::AnalyzeExpr( const parser::Expr &expr) { source_.ExtendToCover(expr.source); @@ -2990,26 +3002,33 @@ std::optional ArgumentAnalyzer::AnalyzeExpr( expr.typedExpr.Reset(new GenericExprWrapper{}, GenericExprWrapper::Deleter); if (isProcedureCall_) { return ActualArgument{ActualArgument::AssumedType{*assumedTypeDummy}}; - } else { - context_.SayAt(expr.source, - "TYPE(*) dummy argument may only be used as an actual argument"_err_en_US); - return std::nullopt; } - } else if (MaybeExpr argExpr{context_.Analyze(expr)}) { - if (!isProcedureCall_ && IsProcedure(*argExpr)) { - if (IsFunction(*argExpr)) { - context_.SayAt( - expr.source, "Function call must have argument list"_err_en_US); - } else { - context_.SayAt( - expr.source, "Subroutine name is not allowed here"_err_en_US); - } - return std::nullopt; + context_.SayAt(expr.source, + "TYPE(*) dummy argument may only be used as an actual argument"_err_en_US); + } else if (MaybeExpr argExpr{AnalyzeExprOrWholeAssumedSizeArray(expr)}) { + if (isProcedureCall_ || !IsProcedure(*argExpr)) { + return ActualArgument{context_.Fold(std::move(*argExpr))}; + } + context_.SayAt(expr.source, + IsFunction(*argExpr) ? "Function call must have argument list"_err_en_US + : "Subroutine name is not allowed here"_err_en_US); + } + return std::nullopt; +} + +MaybeExpr ArgumentAnalyzer::AnalyzeExprOrWholeAssumedSizeArray( + const parser::Expr &expr) { + // If an expression's parse tree is a whole assumed-size array: + // Expr -> Designator -> DataRef -> Name + // treat it as a special case for argument passing and bypass + // the C1002/C1014 constraint checking in expression semantics. + if (const auto *name{parser::Unwrap(expr)}) { + if (name->symbol && semantics::IsAssumedSizeArray(*name->symbol)) { + auto restorer{context_.AllowWholeAssumedSizeArray()}; + return context_.Analyze(expr); } - return ActualArgument{context_.Fold(std::move(*argExpr))}; - } else { - return std::nullopt; } + return context_.Analyze(expr); } bool ArgumentAnalyzer::AreConformable() const { diff --git a/flang/test/Semantics/assign04.f90 b/flang/test/Semantics/assign04.f90 index fb47f6dceab96..1aa87d34af983 100644 --- a/flang/test/Semantics/assign04.f90 +++ b/flang/test/Semantics/assign04.f90 @@ -94,7 +94,7 @@ subroutine s6(x) x(:3) = [1, 2, 3] !ERROR: Assumed-size array 'x' must have explicit final subscript upper bound value x(:) = [1, 2, 3] - !ERROR: Left-hand side of assignment may not be a whole assumed-size array + !ERROR: Whole assumed-size array 'x' may not appear here without subscripts x = [1, 2, 3] end @@ -106,7 +106,7 @@ module m7 subroutine s7(x) type(t) :: x(*) x(:3)%i = [1, 2, 3] - !ERROR: Left-hand side of assignment may not be a whole assumed-size array + !ERROR: Whole assumed-size array 'x' may not appear here without subscripts x%i = [1, 2, 3] end end diff --git a/flang/test/Semantics/io03.f90 b/flang/test/Semantics/io03.f90 index 5eb3420d1aea1..e93646bf37bad 100644 --- a/flang/test/Semantics/io03.f90 +++ b/flang/test/Semantics/io03.f90 @@ -178,6 +178,6 @@ subroutine s(aa, n) !ERROR: Input variable 'n' must be definable read(*, *) n - !ERROR: Whole assumed size array 'aa' may not be an input item + !ERROR: Whole assumed-size array 'aa' may not appear here without subscripts read(*, *) aa end From 297655c123b3ae612f681e936f97818f2a899de6 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 6 Oct 2020 14:38:41 -0700 Subject: [PATCH 273/321] [NFC][regalloc] Use MCRegister instead of unsigned in InterferenceCache Also changed users of APIs. Differential Revision: https://reviews.llvm.org/D88930 --- llvm/lib/CodeGen/InterferenceCache.cpp | 6 +++--- llvm/lib/CodeGen/InterferenceCache.h | 18 ++++++++---------- llvm/lib/CodeGen/RegAllocGreedy.cpp | 4 ++-- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/llvm/lib/CodeGen/InterferenceCache.cpp b/llvm/lib/CodeGen/InterferenceCache.cpp index 617db0450d02e..a56485cdbc674 100644 --- a/llvm/lib/CodeGen/InterferenceCache.cpp +++ b/llvm/lib/CodeGen/InterferenceCache.cpp @@ -60,8 +60,8 @@ void InterferenceCache::init(MachineFunction *mf, Entries[i].clear(mf, indexes, lis); } -InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { - unsigned E = PhysRegEntries[PhysReg]; +InterferenceCache::Entry *InterferenceCache::get(MCRegister PhysReg) { + unsigned char E = PhysRegEntries[PhysReg.id()]; if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) { if (!Entries[E].valid(LIUArray, TRI)) Entries[E].revalidate(LIUArray, TRI); @@ -97,7 +97,7 @@ void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray, RegUnits[i].VirtTag = LIUArray[*Units].getTag(); } -void InterferenceCache::Entry::reset(unsigned physReg, +void InterferenceCache::Entry::reset(MCRegister physReg, LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI, const MachineFunction *MF) { diff --git a/llvm/lib/CodeGen/InterferenceCache.h b/llvm/lib/CodeGen/InterferenceCache.h index 9019e9f61fa0b..ace1691c1363d 100644 --- a/llvm/lib/CodeGen/InterferenceCache.h +++ b/llvm/lib/CodeGen/InterferenceCache.h @@ -44,7 +44,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache { /// of PhysReg in all basic blocks. class Entry { /// PhysReg - The register currently represented. - unsigned PhysReg = 0; + MCRegister PhysReg = 0; /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions /// change. @@ -102,13 +102,13 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache { void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) { assert(!hasRefs() && "Cannot clear cache entry with references"); - PhysReg = 0; + PhysReg = MCRegister::NoRegister; MF = mf; Indexes = indexes; LIS = lis; } - unsigned getPhysReg() const { return PhysReg; } + MCRegister getPhysReg() const { return PhysReg; } void addRef(int Delta) { RefCount += Delta; } @@ -120,10 +120,8 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache { bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI); /// reset - Initialize entry to represent physReg's aliases. - void reset(unsigned physReg, - LiveIntervalUnion *LIUArray, - const TargetRegisterInfo *TRI, - const MachineFunction *MF); + void reset(MCRegister physReg, LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI, const MachineFunction *MF); /// get - Return an up to date BlockInterference. BlockInterference *get(unsigned MBBNum) { @@ -154,7 +152,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache { Entry Entries[CacheEntries]; // get - Get a valid entry for PhysReg. - Entry *get(unsigned PhysReg); + Entry *get(MCRegister PhysReg); public: InterferenceCache() = default; @@ -207,11 +205,11 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache { ~Cursor() { setEntry(nullptr); } /// setPhysReg - Point this cursor to PhysReg's interference. - void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) { + void setPhysReg(InterferenceCache &Cache, MCRegister PhysReg) { // Release reference before getting a new one. That guarantees we can // actually have CacheEntries live cursors. setEntry(nullptr); - if (PhysReg) + if (PhysReg.isValid()) setEntry(Cache.get(PhysReg)); } diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 5b0f9384c04ce..e634eb4118e44 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -361,7 +361,7 @@ class RAGreedy : public MachineFunctionPass, BitVector LiveBundles; SmallVector ActiveBlocks; - void reset(InterferenceCache &Cache, unsigned Reg) { + void reset(InterferenceCache &Cache, MCRegister Reg) { PhysReg = Reg; IntvIdx = 0; Intf.setPhysReg(Cache, Reg); @@ -1372,7 +1372,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { return false; // Compact regions don't correspond to any physreg. - Cand.reset(IntfCache, 0); + Cand.reset(IntfCache, MCRegister::NoRegister); LLVM_DEBUG(dbgs() << "Compact region bundles"); From bd5fe7b010ea0d6c4b1e5e3740085f5eabf7def6 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Mon, 5 Oct 2020 20:24:33 -0700 Subject: [PATCH 274/321] [M680x0] Add google/benchmark's CycleTimer support for M68K This is a cherrypick of the upstream fix commit ffe1342 onto `llvm/utils/benchmark` and `libcxx/utils/google-benchmark`. This adds CycleTimer implementation for M680x0, which simply uses `gettimeofday` same as MIPS. Differential Review: https://reviews.llvm.org/D88868 --- libcxx/utils/google-benchmark/AUTHORS | 1 + libcxx/utils/google-benchmark/CONTRIBUTORS | 1 + libcxx/utils/google-benchmark/README.LLVM | 2 ++ libcxx/utils/google-benchmark/src/cycleclock.h | 2 +- llvm/utils/benchmark/AUTHORS | 1 + llvm/utils/benchmark/CONTRIBUTORS | 1 + llvm/utils/benchmark/README.LLVM | 2 ++ llvm/utils/benchmark/src/cycleclock.h | 2 +- 8 files changed, 10 insertions(+), 2 deletions(-) diff --git a/libcxx/utils/google-benchmark/AUTHORS b/libcxx/utils/google-benchmark/AUTHORS index 09e2e0551adf6..3593870661ec4 100644 --- a/libcxx/utils/google-benchmark/AUTHORS +++ b/libcxx/utils/google-benchmark/AUTHORS @@ -46,3 +46,4 @@ Stripe, Inc. Yixuan Qiu Yusuke Suzuki Zbigniew Skowron +Min-Yih Hsu diff --git a/libcxx/utils/google-benchmark/CONTRIBUTORS b/libcxx/utils/google-benchmark/CONTRIBUTORS index ee74ff886c0c9..9e7a39f25881c 100644 --- a/libcxx/utils/google-benchmark/CONTRIBUTORS +++ b/libcxx/utils/google-benchmark/CONTRIBUTORS @@ -66,3 +66,4 @@ Tom Madams Yixuan Qiu Yusuke Suzuki Zbigniew Skowron +Min-Yih Hsu diff --git a/libcxx/utils/google-benchmark/README.LLVM b/libcxx/utils/google-benchmark/README.LLVM index ea92eee202ce8..7a0c02c420d43 100644 --- a/libcxx/utils/google-benchmark/README.LLVM +++ b/libcxx/utils/google-benchmark/README.LLVM @@ -22,3 +22,5 @@ Changes: to fix timestamp-related inline asm issues and 32-bit RISC-V build failures. The second cherrypicked commit fixes formatting issues introduced by the preceding change. +* https://github.com/google/benchmark/commit/ffe1342eb2faa7d2e7c35b4db2ccf99fab81ec20 + is applied to add the CycleTimer implementation for M680x0 diff --git a/libcxx/utils/google-benchmark/src/cycleclock.h b/libcxx/utils/google-benchmark/src/cycleclock.h index 179c67cd614a6..93d579a739c1c 100644 --- a/libcxx/utils/google-benchmark/src/cycleclock.h +++ b/libcxx/utils/google-benchmark/src/cycleclock.h @@ -161,7 +161,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; -#elif defined(__mips__) +#elif defined(__mips__) || defined(__m68k__) // mips apparently only allows rdtsc for superusers, so we fall // back to gettimeofday. It's possible clock_gettime would be better. struct timeval tv; diff --git a/llvm/utils/benchmark/AUTHORS b/llvm/utils/benchmark/AUTHORS index 381a8f486afb5..052a383f77cdf 100644 --- a/llvm/utils/benchmark/AUTHORS +++ b/llvm/utils/benchmark/AUTHORS @@ -44,3 +44,4 @@ Stripe, Inc. Yixuan Qiu Yusuke Suzuki Zbigniew Skowron +Min-Yih Hsu diff --git a/llvm/utils/benchmark/CONTRIBUTORS b/llvm/utils/benchmark/CONTRIBUTORS index 1cf04db17e4ed..53e7b6f9f8b42 100644 --- a/llvm/utils/benchmark/CONTRIBUTORS +++ b/llvm/utils/benchmark/CONTRIBUTORS @@ -63,3 +63,4 @@ Tom Madams Yixuan Qiu Yusuke Suzuki Zbigniew Skowron +Min-Yih Hsu diff --git a/llvm/utils/benchmark/README.LLVM b/llvm/utils/benchmark/README.LLVM index b370925b95434..afd70a3dd9141 100644 --- a/llvm/utils/benchmark/README.LLVM +++ b/llvm/utils/benchmark/README.LLVM @@ -33,3 +33,5 @@ Changes: are applied on top of the previous cherrypick to fix timestamp-related inline asm issues and 32-bit RISC-V build failures. The second cherrypicked commit fixes formatting issues introduced by the preceding change. +* https://github.com/google/benchmark/commit/ffe1342eb2faa7d2e7c35b4db2ccf99fab81ec20 + is applied to add the CycleTimer implementation for M680x0 diff --git a/llvm/utils/benchmark/src/cycleclock.h b/llvm/utils/benchmark/src/cycleclock.h index 1b0f09359c9b4..88b7805faafa2 100644 --- a/llvm/utils/benchmark/src/cycleclock.h +++ b/llvm/utils/benchmark/src/cycleclock.h @@ -161,7 +161,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; -#elif defined(__mips__) +#elif defined(__mips__) || defined(__m68k__) // mips apparently only allows rdtsc for superusers, so we fall // back to gettimeofday. It's possible clock_gettime would be better. struct timeval tv; From 81b11c91070f3a969b64b2c2e6011b02450fa75f Mon Sep 17 00:00:00 2001 From: Jim Ingham Date: Wed, 7 Oct 2020 15:00:08 -0700 Subject: [PATCH 275/321] Fix a macOS build break caused by 3dfb94986170. --- .../MacOSX/objcxx/PlatformiOSSimulatorCoreSimulatorSupport.mm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/Platform/MacOSX/objcxx/PlatformiOSSimulatorCoreSimulatorSupport.mm b/lldb/source/Plugins/Platform/MacOSX/objcxx/PlatformiOSSimulatorCoreSimulatorSupport.mm index cfd44f9ae5ce4..92bf716599b08 100644 --- a/lldb/source/Plugins/Platform/MacOSX/objcxx/PlatformiOSSimulatorCoreSimulatorSupport.mm +++ b/lldb/source/Plugins/Platform/MacOSX/objcxx/PlatformiOSSimulatorCoreSimulatorSupport.mm @@ -415,8 +415,9 @@ static Status HandleFileAction(ProcessLaunchInfo &launch_info, secondary_fd = launch_info.GetPTY().OpenSecondary(O_RDWR, nullptr, 0); if (secondary_fd == PseudoTerminal::invalid_fd) { + std::string secondary_path = secondary_spec.GetPath(); error.SetErrorStringWithFormat( - "unable to open secondary pty '%s'", secondary_path); + "unable to open secondary pty '%s'", secondary_path.c_str()); return error; // Failure } [options setValue:[NSNumber numberWithInteger:secondary_fd] From 8da0df3d6dcc0dd42740be60b0da4ec201190904 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 7 Oct 2020 18:03:09 -0400 Subject: [PATCH 276/321] [libc++] Remove unused includes of Availability.h Since ebaf1d5e2b, the macros defined in are not used anymore. --- libcxx/include/iterator | 3 --- libcxx/include/locale | 4 ---- 2 files changed, 7 deletions(-) diff --git a/libcxx/include/iterator b/libcxx/include/iterator index e8e379624ac09..99f9b8173dbfe 100644 --- a/libcxx/include/iterator +++ b/libcxx/include/iterator @@ -421,9 +421,6 @@ template constexpr const E* data(initializer_list il) noexcept; #include #include #include -#ifdef __APPLE__ -#include -#endif #include <__debug> diff --git a/libcxx/include/locale b/libcxx/include/locale index 60aab50d17644..2723c0cfea666 100644 --- a/libcxx/include/locale +++ b/libcxx/include/locale @@ -197,10 +197,6 @@ template class messages_byname; #include #endif -#ifdef __APPLE__ -#include -#endif - #ifdef _LIBCPP_LOCALE__L_EXTENSIONS #include <__bsd_locale_defaults.h> #else From d56fdc8e95df3431b67d33fe4b03a08406897339 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Fri, 2 Oct 2020 12:39:05 -0700 Subject: [PATCH 277/321] [flang][msvc] Avoid dependence on long double MSVC does not support a distinct 80-bit extended precision "long double" type. Rework the I/O runtime to avoid using native C/C++ type names. Centralize the mappings between the KIND= type parameters of REAL and their binary precisions in the common real.h header file, and use KIND type parameter values rather than binary precisions for clarity where appropriate. This patch, if successful, should obviate the need for Differential review D88511. (This patch anticipates a successful review of D88688, which fixes the function that maps each kind of real to its maximum number of significant decimal digits.) Differential revision: https://reviews.llvm.org/D88752 --- flang/include/flang/Common/real.h | 82 ++++++++++++++----- .../flang/Decimal/binary-floating-point.h | 1 + flang/include/flang/Evaluate/type.h | 51 ++---------- flang/runtime/descriptor-io.h | 56 +++++++------ flang/runtime/edit-input.cpp | 19 +++-- flang/runtime/edit-input.h | 15 ++-- flang/runtime/edit-output.cpp | 11 +-- flang/runtime/edit-output.h | 20 ++--- 8 files changed, 134 insertions(+), 121 deletions(-) diff --git a/flang/include/flang/Common/real.h b/flang/include/flang/Common/real.h index 6ff9e441f2ce0..036f665d3da61 100644 --- a/flang/include/flang/Common/real.h +++ b/flang/include/flang/Common/real.h @@ -20,20 +20,20 @@ namespace Fortran::common { // Total representation size in bits for each type static constexpr int BitsForBinaryPrecision(int binaryPrecision) { switch (binaryPrecision) { - case 8: - return 16; // IEEE single (truncated): 1+8+7 - case 11: - return 16; // IEEE half precision: 1+5+10 - case 24: - return 32; // IEEE single precision: 1+8+23 - case 53: - return 64; // IEEE double precision: 1+11+52 - case 64: - return 80; // x87 extended precision: 1+15+64 - case 106: - return 128; // "double-double": 2*(1+11+52) - case 113: - return 128; // IEEE quad precision: 1+15+112 + case 8: // IEEE single (truncated): 1+8+7 with implicit bit + return 16; + case 11: // IEEE half precision: 1+5+10 with implicit bit + return 16; + case 24: // IEEE single precision: 1+8+23 with implicit bit + return 32; + case 53: // IEEE double precision: 1+11+52 with implicit bit + return 64; + case 64: // x87 extended precision: 1+15+64, no implicit bit + return 80; + case 106: // "double-double": 2*(1+11+52 with implicit bit) + return 128; + case 113: // IEEE quad precision: 1+15+112 with implicit bit + return 128; default: return -1; } @@ -44,25 +44,65 @@ static constexpr int BitsForBinaryPrecision(int binaryPrecision) { // with the minimum exponent (biased to 1) and all fractional bits set. static constexpr int MaxDecimalConversionDigits(int binaryPrecision) { switch (binaryPrecision) { - case 8: + case 8: // IEEE single (truncated): 1+8+7 with implicit bit return 96; - case 11: + case 11: // IEEE half precision: 1+5+10 with implicit bit return 21; - case 24: + case 24: // IEEE single precision: 1+8+23 with implicit bit return 112; - case 53: + case 53: // IEEE double precision: 1+11+52 with implicit bit return 767; - case 64: + case 64: // x87 extended precision: 1+15+64, no implicit bit return 11514; - case 106: + case 106: // "double-double": 2*(1+11+52 with implicit bit) return 2 * 767; - case 113: + case 113: // IEEE quad precision: 1+15+112 with implicit bit return 11563; default: return -1; } } +static constexpr int RealKindForPrecision(int binaryPrecision) { + switch (binaryPrecision) { + case 8: // IEEE single (truncated): 1+8+7 with implicit bit + return 3; + case 11: // IEEE half precision: 1+5+10 with implicit bit + return 2; + case 24: // IEEE single precision: 1+8+23 with implicit bit + return 4; + case 53: // IEEE double precision: 1+11+52 with implicit bit + return 8; + case 64: // x87 extended precision: 1+15+64, no implicit bit + return 10; + // TODO: case 106: return kind for double/double + case 113: // IEEE quad precision: 1+15+112 with implicit bit + return 16; + default: + return -1; + } +} + +static constexpr int PrecisionOfRealKind(int kind) { + switch (kind) { + case 2: // IEEE half precision: 1+5+10 with implicit bit + return 11; + case 3: // IEEE single (truncated): 1+8+7 with implicit bit + return 8; + case 4: // IEEE single precision: 1+8+23 with implicit bit + return 24; + case 8: // IEEE double precision: 1+11+52 with implicit bit + return 53; + case 10: // x87 extended precision: 1+15+64, no implicit bit + return 64; + // TODO: case kind for double/double: return 106; + case 16: // IEEE quad precision: 1+15+112 with implicit bit + return 113; + default: + return -1; + } +} + template class RealDetails { private: // Converts bit widths to whole decimal digits diff --git a/flang/include/flang/Decimal/binary-floating-point.h b/flang/include/flang/Decimal/binary-floating-point.h index 24c23b0ce5ce6..b2ff4197ce701 100644 --- a/flang/include/flang/Decimal/binary-floating-point.h +++ b/flang/include/flang/Decimal/binary-floating-point.h @@ -48,6 +48,7 @@ class BinaryFloatingPointNumber : public common::RealDetails { const BinaryFloatingPointNumber &that) = default; constexpr BinaryFloatingPointNumber &operator=( BinaryFloatingPointNumber &&that) = default; + constexpr explicit BinaryFloatingPointNumber(RawType raw) : raw_{raw} {} RawType raw() const { return raw_; } diff --git a/flang/include/flang/Evaluate/type.h b/flang/include/flang/Evaluate/type.h index 183cb6de2781b..0619f9290cbf9 100644 --- a/flang/include/flang/Evaluate/type.h +++ b/flang/include/flang/Evaluate/type.h @@ -24,6 +24,7 @@ #include "real.h" #include "flang/Common/Fortran.h" #include "flang/Common/idioms.h" +#include "flang/Common/real.h" #include "flang/Common/template.h" #include #include @@ -235,51 +236,13 @@ class Type using Scalar = value::Integer<8 * KIND>; }; -// REAL(KIND=2) is IEEE half-precision (16 bits) -template <> -class Type : public TypeBase { -public: - using Scalar = - value::Real::Scalar, 11>; -}; - -// REAL(KIND=3) identifies the "other" half-precision format, which is -// basically REAL(4) without its least-order 16 fraction bits. -template <> -class Type : public TypeBase { -public: - using Scalar = - value::Real::Scalar, 8>; -}; - -// REAL(KIND=4) is IEEE-754 single precision (32 bits) -template <> -class Type : public TypeBase { -public: - using Scalar = - value::Real::Scalar, 24>; -}; - -// REAL(KIND=8) is IEEE double precision (64 bits) -template <> -class Type : public TypeBase { -public: - using Scalar = - value::Real::Scalar, 53>; -}; - -// REAL(KIND=10) is x87 FPU extended precision (80 bits, all explicit) -template <> -class Type : public TypeBase { -public: - using Scalar = value::Real, 64>; -}; - -// REAL(KIND=16) is IEEE quad precision (128 bits) -template <> -class Type : public TypeBase { +template +class Type + : public TypeBase { public: - using Scalar = value::Real, 113>; + static constexpr int precision{common::PrecisionOfRealKind(KIND)}; + static constexpr int bits{common::BitsForBinaryPrecision(precision)}; + using Scalar = value::Real, precision>; }; // The KIND type parameter on COMPLEX is the kind of each of its components. diff --git a/flang/runtime/descriptor-io.h b/flang/runtime/descriptor-io.h index 22552f27c1699..f98797d78b509 100644 --- a/flang/runtime/descriptor-io.h +++ b/flang/runtime/descriptor-io.h @@ -61,21 +61,22 @@ inline bool FormattedIntegerIO( return true; } -template +template inline bool FormattedRealIO( IoStatementState &io, const Descriptor &descriptor) { std::size_t numElements{descriptor.Elements()}; SubscriptValue subscripts[maxRank]; descriptor.GetLowerBounds(subscripts); + using RawType = typename RealOutputEditing::BinaryFloatingPoint; for (std::size_t j{0}; j < numElements; ++j) { if (auto edit{io.GetNextDataEdit()}) { - A &x{ExtractElement(io, descriptor, subscripts)}; + RawType &x{ExtractElement(io, descriptor, subscripts)}; if constexpr (DIR == Direction::Output) { - if (!RealOutputEditing{io, x}.Edit(*edit)) { + if (!RealOutputEditing{io, x}.Edit(*edit)) { return false; } } else if (edit->descriptor != DataEdit::ListDirectedNullValue) { - if (!EditRealInput(io, *edit, reinterpret_cast(&x))) { + if (!EditRealInput(io, *edit, reinterpret_cast(&x))) { return false; } } @@ -90,7 +91,7 @@ inline bool FormattedRealIO( return true; } -template +template inline bool FormattedComplexIO( IoStatementState &io, const Descriptor &descriptor) { std::size_t numElements{descriptor.Elements()}; @@ -98,14 +99,15 @@ inline bool FormattedComplexIO( descriptor.GetLowerBounds(subscripts); bool isListOutput{ io.get_if>() != nullptr}; + using RawType = typename RealOutputEditing::BinaryFloatingPoint; for (std::size_t j{0}; j < numElements; ++j) { - A *x{&ExtractElement(io, descriptor, subscripts)}; + RawType *x{&ExtractElement(io, descriptor, subscripts)}; if (isListOutput) { DataEdit rEdit, iEdit; rEdit.descriptor = DataEdit::ListDirectedRealPart; iEdit.descriptor = DataEdit::ListDirectedImaginaryPart; - if (!RealOutputEditing{io, x[0]}.Edit(rEdit) || - !RealOutputEditing{io, x[1]}.Edit(iEdit)) { + if (!RealOutputEditing{io, x[0]}.Edit(rEdit) || + !RealOutputEditing{io, x[1]}.Edit(iEdit)) { return false; } } else { @@ -114,12 +116,12 @@ inline bool FormattedComplexIO( if (!edit) { return false; } else if constexpr (DIR == Direction::Output) { - if (!RealOutputEditing{io, *x}.Edit(*edit)) { + if (!RealOutputEditing{io, *x}.Edit(*edit)) { return false; } } else if (edit->descriptor == DataEdit::ListDirectedNullValue) { break; - } else if (!EditRealInput( + } else if (!EditRealInput( io, *edit, reinterpret_cast(x))) { return false; } @@ -275,18 +277,19 @@ static bool DescriptorIO(IoStatementState &io, const Descriptor &descriptor) { } case TypeCategory::Real: switch (kind) { + case 2: + return FormattedRealIO<2, DIR>(io, descriptor); + case 3: + return FormattedRealIO<3, DIR>(io, descriptor); case 4: - return FormattedRealIO<24, float, DIR>(io, descriptor); + return FormattedRealIO<4, DIR>(io, descriptor); case 8: - return FormattedRealIO<53, double, DIR>(io, descriptor); -#if __x86_64__ + return FormattedRealIO<8, DIR>(io, descriptor); case 10: - return FormattedRealIO<64, long double, DIR>(io, descriptor); -#else + return FormattedRealIO<10, DIR>(io, descriptor); + // TODO: case double/double case 16: - return FormattedRealIO<113, long double, DIR>(io, descriptor); -#endif - // TODO cases 2, 3 + return FormattedRealIO<16, DIR>(io, descriptor); default: io.GetIoErrorHandler().Crash( "DescriptorIO: Unimplemented REAL kind (%d) in descriptor", kind); @@ -294,18 +297,19 @@ static bool DescriptorIO(IoStatementState &io, const Descriptor &descriptor) { } case TypeCategory::Complex: switch (kind) { + case 2: + return FormattedComplexIO<2, DIR>(io, descriptor); + case 3: + return FormattedComplexIO<3, DIR>(io, descriptor); case 4: - return FormattedComplexIO<24, float, DIR>(io, descriptor); + return FormattedComplexIO<4, DIR>(io, descriptor); case 8: - return FormattedComplexIO<53, double, DIR>(io, descriptor); -#if __x86_64__ + return FormattedComplexIO<8, DIR>(io, descriptor); case 10: - return FormattedComplexIO<64, long double, DIR>(io, descriptor); -#else + return FormattedComplexIO<10, DIR>(io, descriptor); + // TODO: case double/double case 16: - return FormattedComplexIO<113, long double, DIR>(io, descriptor); -#endif - // TODO cases 2, 3 + return FormattedComplexIO<16, DIR>(io, descriptor); default: io.GetIoErrorHandler().Crash( "DescriptorIO: Unimplemented COMPLEX kind (%d) in descriptor", diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp index da281aa68e435..08693f251b07f 100644 --- a/flang/runtime/edit-input.cpp +++ b/flang/runtime/edit-input.cpp @@ -260,8 +260,9 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, return got; } -template +template bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) { + constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)}; static constexpr int maxDigits{ common::MaxDecimalConversionDigits(binaryPrecision)}; static constexpr int bufferSize{maxDigits + 18}; @@ -294,8 +295,9 @@ bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) { return true; } -template +template bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) { + constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)}; switch (edit.descriptor) { case DataEdit::ListDirected: case DataEdit::ListDirectedRealPart: @@ -304,7 +306,7 @@ bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) { case 'E': // incl. EN, ES, & EX case 'D': case 'G': - return EditCommonRealInput(io, edit, n); + return EditCommonRealInput(io, edit, n); case 'B': return EditBOZInput( io, edit, n, 2, common::BitsForBinaryPrecision(binaryPrecision)); @@ -459,10 +461,11 @@ bool EditDefaultCharacterInput( return true; } +template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *); +template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *); +template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *); template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *); -template bool EditRealInput<11>(IoStatementState &, const DataEdit &, void *); -template bool EditRealInput<24>(IoStatementState &, const DataEdit &, void *); -template bool EditRealInput<53>(IoStatementState &, const DataEdit &, void *); -template bool EditRealInput<64>(IoStatementState &, const DataEdit &, void *); -template bool EditRealInput<113>(IoStatementState &, const DataEdit &, void *); +template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *); +// TODO: double/double +template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *); } // namespace Fortran::runtime::io diff --git a/flang/runtime/edit-input.h b/flang/runtime/edit-input.h index c628b9c7e6737..a8b0e76cfefd4 100644 --- a/flang/runtime/edit-input.h +++ b/flang/runtime/edit-input.h @@ -17,24 +17,25 @@ namespace Fortran::runtime::io { bool EditIntegerInput(IoStatementState &, const DataEdit &, void *, int kind); -template +template bool EditRealInput(IoStatementState &, const DataEdit &, void *); bool EditLogicalInput(IoStatementState &, const DataEdit &, bool &); bool EditDefaultCharacterInput( IoStatementState &, const DataEdit &, char *, std::size_t); -extern template bool EditRealInput<8>( +extern template bool EditRealInput<2>( IoStatementState &, const DataEdit &, void *); -extern template bool EditRealInput<11>( +extern template bool EditRealInput<3>( IoStatementState &, const DataEdit &, void *); -extern template bool EditRealInput<24>( +extern template bool EditRealInput<4>( IoStatementState &, const DataEdit &, void *); -extern template bool EditRealInput<53>( +extern template bool EditRealInput<8>( IoStatementState &, const DataEdit &, void *); -extern template bool EditRealInput<64>( +extern template bool EditRealInput<10>( IoStatementState &, const DataEdit &, void *); -extern template bool EditRealInput<113>( +// TODO: double/double +extern template bool EditRealInput<16>( IoStatementState &, const DataEdit &, void *); } // namespace Fortran::runtime::io #endif // FORTRAN_RUNTIME_EDIT_INPUT_H_ diff --git a/flang/runtime/edit-output.cpp b/flang/runtime/edit-output.cpp index 145e01044144c..31ba9f152d74f 100644 --- a/flang/runtime/edit-output.cpp +++ b/flang/runtime/edit-output.cpp @@ -495,10 +495,11 @@ template bool EditIntegerOutput( template bool EditIntegerOutput( IoStatementState &, const DataEdit &, common::uint128_t); +template class RealOutputEditing<2>; +template class RealOutputEditing<3>; +template class RealOutputEditing<4>; template class RealOutputEditing<8>; -template class RealOutputEditing<11>; -template class RealOutputEditing<24>; -template class RealOutputEditing<53>; -template class RealOutputEditing<64>; -template class RealOutputEditing<113>; +template class RealOutputEditing<10>; +// TODO: double/double +template class RealOutputEditing<16>; } // namespace Fortran::runtime::io diff --git a/flang/runtime/edit-output.h b/flang/runtime/edit-output.h index d819c1007cafb..251dc797f5e09 100644 --- a/flang/runtime/edit-output.h +++ b/flang/runtime/edit-output.h @@ -60,18 +60,17 @@ class RealOutputEditingBase { char exponent_[16]; }; -template -class RealOutputEditing : public RealOutputEditingBase { +template class RealOutputEditing : public RealOutputEditingBase { public: + static constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)}; + using BinaryFloatingPoint = + decimal::BinaryFloatingPointNumber; template RealOutputEditing(IoStatementState &io, A x) : RealOutputEditingBase{io}, x_{x} {} bool Edit(const DataEdit &); private: - using BinaryFloatingPoint = - decimal::BinaryFloatingPointNumber; - // The DataEdit arguments here are const references or copies so that // the original DataEdit can safely serve multiple array elements when // it has a repeat count. @@ -104,12 +103,13 @@ extern template bool EditIntegerOutput( extern template bool EditIntegerOutput( IoStatementState &, const DataEdit &, common::uint128_t); +extern template class RealOutputEditing<2>; +extern template class RealOutputEditing<3>; +extern template class RealOutputEditing<4>; extern template class RealOutputEditing<8>; -extern template class RealOutputEditing<11>; -extern template class RealOutputEditing<24>; -extern template class RealOutputEditing<53>; -extern template class RealOutputEditing<64>; -extern template class RealOutputEditing<113>; +extern template class RealOutputEditing<10>; +// TODO: double/double +extern template class RealOutputEditing<16>; } // namespace Fortran::runtime::io #endif // FORTRAN_RUNTIME_EDIT_OUTPUT_H_ From 4d5b1de40eccc7ffcfb859cef407e5f30bee77f8 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Fri, 25 Sep 2020 23:03:06 -0700 Subject: [PATCH 278/321] [sanitizer] Skip stack symbolization when not required for print format Adds a check to avoid symbolization when printing stack traces if the stack_trace_format flag does not need it. While there is a symbolize flag that can be turned off to skip some of the symbolization, SymbolizePC() still unconditionally looks up the module name and offset. Avoid invoking SymbolizePC() at all if not needed. This is an efficiency improvement when dumping all stack traces as part of the memory profiler in D87120, for large stripped apps where we want to symbolize as a post pass. Differential Revision: https://reviews.llvm.org/D88361 --- compiler-rt/lib/hwasan/hwasan_report.cpp | 2 +- .../sanitizer_stacktrace_libcdep.cpp | 20 ++++- .../sanitizer_stacktrace_printer.cpp | 89 ++++++++++++------- .../sanitizer_stacktrace_printer.h | 4 +- .../sanitizer_symbolizer_report.cpp | 3 +- .../sanitizer_stacktrace_printer_test.cpp | 33 +++---- compiler-rt/lib/tsan/rtl/tsan_report.cpp | 3 +- .../TestCases/print-stack-trace.cpp | 5 ++ 8 files changed, 104 insertions(+), 55 deletions(-) diff --git a/compiler-rt/lib/hwasan/hwasan_report.cpp b/compiler-rt/lib/hwasan/hwasan_report.cpp index 206aa601903eb..0be7deeaee1a0 100644 --- a/compiler-rt/lib/hwasan/hwasan_report.cpp +++ b/compiler-rt/lib/hwasan/hwasan_report.cpp @@ -224,7 +224,7 @@ static void PrintStackAllocations(StackAllocationsRingBuffer *sa, frame_desc.append(" record_addr:0x%zx record:0x%zx", reinterpret_cast(record_addr), record); if (SymbolizedStack *frame = Symbolizer::GetOrInit()->SymbolizePC(pc)) { - RenderFrame(&frame_desc, " %F %L\n", 0, frame->info, + RenderFrame(&frame_desc, " %F %L\n", 0, frame->info.address, &frame->info, common_flags()->symbolize_vs_style, common_flags()->strip_path_prefix); frame->ClearAll(); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp index 68bd0bb296292..7808ba9b0f572 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp @@ -26,17 +26,23 @@ void StackTrace::Print() const { InternalScopedString frame_desc(GetPageSizeCached() * 2); InternalScopedString dedup_token(GetPageSizeCached()); int dedup_frames = common_flags()->dedup_token_length; + bool symbolize = RenderNeedsSymbolization(common_flags()->stack_trace_format); uptr frame_num = 0; for (uptr i = 0; i < size && trace[i]; i++) { // PCs in stack traces are actually the return addresses, that is, // addresses of the next instructions after the call. uptr pc = GetPreviousInstructionPc(trace[i]); - SymbolizedStack *frames = Symbolizer::GetOrInit()->SymbolizePC(pc); + SymbolizedStack *frames; + if (symbolize) + frames = Symbolizer::GetOrInit()->SymbolizePC(pc); + else + frames = SymbolizedStack::New(pc); CHECK(frames); for (SymbolizedStack *cur = frames; cur; cur = cur->next) { frame_desc.clear(); RenderFrame(&frame_desc, common_flags()->stack_trace_format, frame_num++, - cur->info, common_flags()->symbolize_vs_style, + cur->info.address, symbolize ? &cur->info : nullptr, + common_flags()->symbolize_vs_style, common_flags()->strip_path_prefix); Printf("%s\n", frame_desc.data()); if (dedup_frames-- > 0) { @@ -108,7 +114,12 @@ void __sanitizer_symbolize_pc(uptr pc, const char *fmt, char *out_buf, uptr out_buf_size) { if (!out_buf_size) return; pc = StackTrace::GetPreviousInstructionPc(pc); - SymbolizedStack *frame = Symbolizer::GetOrInit()->SymbolizePC(pc); + SymbolizedStack *frame; + bool symbolize = RenderNeedsSymbolization(fmt); + if (symbolize) + frame = Symbolizer::GetOrInit()->SymbolizePC(pc); + else + frame = SymbolizedStack::New(pc); if (!frame) { internal_strncpy(out_buf, "", out_buf_size); out_buf[out_buf_size - 1] = 0; @@ -121,7 +132,8 @@ void __sanitizer_symbolize_pc(uptr pc, const char *fmt, char *out_buf, for (SymbolizedStack *cur = frame; cur && out_buf < out_end; cur = cur->next) { frame_desc.clear(); - RenderFrame(&frame_desc, fmt, frame_num++, cur->info, + RenderFrame(&frame_desc, fmt, frame_num++, cur->info.address, + symbolize ? &cur->info : nullptr, common_flags()->symbolize_vs_style, common_flags()->strip_path_prefix); if (!frame_desc.length()) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp index 150ff475316bd..97755ca1c560c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp @@ -107,8 +107,14 @@ static const char *DemangleFunctionName(const char *function) { static const char kDefaultFormat[] = " #%n %p %F %L"; void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no, - const AddressInfo &info, bool vs_style, + uptr address, const AddressInfo *info, bool vs_style, const char *strip_path_prefix, const char *strip_func_prefix) { + // info will be null in the case where symbolization is not needed for the + // given format. This ensures that the code below will get a hard failure + // rather than print incorrect information in case RenderNeedsSymbolization + // ever ends up out of sync with this function. If non-null, the addresses + // should match. + CHECK(!info || address == info->address); if (0 == internal_strcmp(format, "DEFAULT")) format = kDefaultFormat; for (const char *p = format; *p != '\0'; p++) { @@ -126,71 +132,69 @@ void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no, buffer->append("%zu", frame_no); break; case 'p': - buffer->append("0x%zx", info.address); + buffer->append("0x%zx", address); break; case 'm': - buffer->append("%s", StripPathPrefix(info.module, strip_path_prefix)); + buffer->append("%s", StripPathPrefix(info->module, strip_path_prefix)); break; case 'o': - buffer->append("0x%zx", info.module_offset); + buffer->append("0x%zx", info->module_offset); break; case 'f': - buffer->append("%s", - DemangleFunctionName( - StripFunctionName(info.function, strip_func_prefix))); + buffer->append("%s", DemangleFunctionName(StripFunctionName( + info->function, strip_func_prefix))); break; case 'q': - buffer->append("0x%zx", info.function_offset != AddressInfo::kUnknown - ? info.function_offset + buffer->append("0x%zx", info->function_offset != AddressInfo::kUnknown + ? info->function_offset : 0x0); break; case 's': - buffer->append("%s", StripPathPrefix(info.file, strip_path_prefix)); + buffer->append("%s", StripPathPrefix(info->file, strip_path_prefix)); break; case 'l': - buffer->append("%d", info.line); + buffer->append("%d", info->line); break; case 'c': - buffer->append("%d", info.column); + buffer->append("%d", info->column); break; // Smarter special cases. case 'F': // Function name and offset, if file is unknown. - if (info.function) { - buffer->append("in %s", - DemangleFunctionName( - StripFunctionName(info.function, strip_func_prefix))); - if (!info.file && info.function_offset != AddressInfo::kUnknown) - buffer->append("+0x%zx", info.function_offset); + if (info->function) { + buffer->append("in %s", DemangleFunctionName(StripFunctionName( + info->function, strip_func_prefix))); + if (!info->file && info->function_offset != AddressInfo::kUnknown) + buffer->append("+0x%zx", info->function_offset); } break; case 'S': // File/line information. - RenderSourceLocation(buffer, info.file, info.line, info.column, vs_style, - strip_path_prefix); + RenderSourceLocation(buffer, info->file, info->line, info->column, + vs_style, strip_path_prefix); break; case 'L': // Source location, or module location. - if (info.file) { - RenderSourceLocation(buffer, info.file, info.line, info.column, + if (info->file) { + RenderSourceLocation(buffer, info->file, info->line, info->column, vs_style, strip_path_prefix); - } else if (info.module) { - RenderModuleLocation(buffer, info.module, info.module_offset, - info.module_arch, strip_path_prefix); + } else if (info->module) { + RenderModuleLocation(buffer, info->module, info->module_offset, + info->module_arch, strip_path_prefix); } else { buffer->append("()"); } break; case 'M': // Module basename and offset, or PC. - if (info.address & kExternalPCBit) - {} // There PCs are not meaningful. - else if (info.module) + if (address & kExternalPCBit) { + } // There PCs are not meaningful. + else if (info->module) // Always strip the module name for %M. - RenderModuleLocation(buffer, StripModuleName(info.module), - info.module_offset, info.module_arch, ""); + RenderModuleLocation(buffer, StripModuleName(info->module), + info->module_offset, info->module_arch, ""); else - buffer->append("(%p)", (void *)info.address); + buffer->append("(%p)", (void *)address); break; default: Report("Unsupported specifier in stack frame format: %c (0x%zx)!\n", *p, @@ -200,6 +204,29 @@ void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no, } } +bool RenderNeedsSymbolization(const char *format) { + if (0 == internal_strcmp(format, "DEFAULT")) + format = kDefaultFormat; + for (const char *p = format; *p != '\0'; p++) { + if (*p != '%') + continue; + p++; + switch (*p) { + case '%': + break; + case 'n': + // frame_no + break; + case 'p': + // address + break; + default: + return true; + } + } + return false; +} + void RenderData(InternalScopedString *buffer, const char *format, const DataInfo *DI, const char *strip_path_prefix) { for (const char *p = format; *p != '\0'; p++) { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h index f7f7629f773f9..96119b2ee9e9f 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h @@ -47,10 +47,12 @@ namespace __sanitizer { // module+offset if it is known, or () string. // %M - prints module basename and offset, if it is known, or PC. void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no, - const AddressInfo &info, bool vs_style, + uptr address, const AddressInfo *info, bool vs_style, const char *strip_path_prefix = "", const char *strip_func_prefix = ""); +bool RenderNeedsSymbolization(const char *format); + void RenderSourceLocation(InternalScopedString *buffer, const char *file, int line, int column, bool vs_style, const char *strip_path_prefix); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp index c8eb781dfc845..06301b83ea1f0 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp @@ -33,7 +33,8 @@ void ReportErrorSummary(const char *error_type, const AddressInfo &info, if (!common_flags()->print_summary) return; InternalScopedString buff(kMaxSummaryLength); buff.append("%s ", error_type); - RenderFrame(&buff, "%L %F", 0, info, common_flags()->symbolize_vs_style, + RenderFrame(&buff, "%L %F", 0, info.address, &info, + common_flags()->symbolize_vs_style, common_flags()->strip_path_prefix); ReportErrorSummary(buff.data(), alt_tool_name); } diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_printer_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_printer_test.cpp index 1ce89a30cf69d..a98e47ab6c53b 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_printer_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_printer_test.cpp @@ -79,10 +79,11 @@ TEST(SanitizerStacktracePrinter, RenderFrame) { InternalScopedString str(256); // Dump all the AddressInfo fields. - RenderFrame(&str, "%% Frame:%n PC:%p Module:%m ModuleOffset:%o " - "Function:%f FunctionOffset:%q Source:%s Line:%l " - "Column:%c", - frame_no, info, false, "/path/to/", "function_"); + RenderFrame(&str, + "%% Frame:%n PC:%p Module:%m ModuleOffset:%o " + "Function:%f FunctionOffset:%q Source:%s Line:%l " + "Column:%c", + frame_no, info.address, &info, false, "/path/to/", "function_"); EXPECT_STREQ("% Frame:42 PC:0x400000 Module:my/module ModuleOffset:0x200 " "Function:foo FunctionOffset:0x100 Source:my/source Line:10 " "Column:5", @@ -92,61 +93,61 @@ TEST(SanitizerStacktracePrinter, RenderFrame) { // Test special format specifiers. info.address = 0x400000; - RenderFrame(&str, "%M", frame_no, info, false); + RenderFrame(&str, "%M", frame_no, info.address, &info, false); EXPECT_NE(nullptr, internal_strstr(str.data(), "400000")); str.clear(); - RenderFrame(&str, "%L", frame_no, info, false); + RenderFrame(&str, "%L", frame_no, info.address, &info, false); EXPECT_STREQ("()", str.data()); str.clear(); info.module = internal_strdup("/path/to/module"); info.module_offset = 0x200; - RenderFrame(&str, "%M", frame_no, info, false); + RenderFrame(&str, "%M", frame_no, info.address, &info, false); EXPECT_NE(nullptr, internal_strstr(str.data(), "(module+0x")); EXPECT_NE(nullptr, internal_strstr(str.data(), "200")); str.clear(); - RenderFrame(&str, "%L", frame_no, info, false); + RenderFrame(&str, "%L", frame_no, info.address, &info, false); EXPECT_STREQ("(/path/to/module+0x200)", str.data()); str.clear(); info.function = internal_strdup("my_function"); - RenderFrame(&str, "%F", frame_no, info, false); + RenderFrame(&str, "%F", frame_no, info.address, &info, false); EXPECT_STREQ("in my_function", str.data()); str.clear(); info.function_offset = 0x100; - RenderFrame(&str, "%F %S", frame_no, info, false); + RenderFrame(&str, "%F %S", frame_no, info.address, &info, false); EXPECT_STREQ("in my_function+0x100 ", str.data()); str.clear(); info.file = internal_strdup("my_file"); - RenderFrame(&str, "%F %S", frame_no, info, false); + RenderFrame(&str, "%F %S", frame_no, info.address, &info, false); EXPECT_STREQ("in my_function my_file", str.data()); str.clear(); info.line = 10; - RenderFrame(&str, "%F %S", frame_no, info, false); + RenderFrame(&str, "%F %S", frame_no, info.address, &info, false); EXPECT_STREQ("in my_function my_file:10", str.data()); str.clear(); info.column = 5; - RenderFrame(&str, "%S %L", frame_no, info, false); + RenderFrame(&str, "%S %L", frame_no, info.address, &info, false); EXPECT_STREQ("my_file:10:5 my_file:10:5", str.data()); str.clear(); - RenderFrame(&str, "%S %L", frame_no, info, true); + RenderFrame(&str, "%S %L", frame_no, info.address, &info, true); EXPECT_STREQ("my_file(10,5) my_file(10,5)", str.data()); str.clear(); info.column = 0; - RenderFrame(&str, "%F %S", frame_no, info, true); + RenderFrame(&str, "%F %S", frame_no, info.address, &info, true); EXPECT_STREQ("in my_function my_file(10)", str.data()); str.clear(); info.line = 0; - RenderFrame(&str, "%F %S", frame_no, info, true); + RenderFrame(&str, "%F %S", frame_no, info.address, &info, true); EXPECT_STREQ("in my_function my_file", str.data()); str.clear(); diff --git a/compiler-rt/lib/tsan/rtl/tsan_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_report.cpp index 368f1ca8adf2c..4892c446c104b 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_report.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_report.cpp @@ -128,7 +128,8 @@ void PrintStack(const ReportStack *ent) { SymbolizedStack *frame = ent->frames; for (int i = 0; frame && frame->info.address; frame = frame->next, i++) { InternalScopedString res(2 * GetPageSizeCached()); - RenderFrame(&res, common_flags()->stack_trace_format, i, frame->info, + RenderFrame(&res, common_flags()->stack_trace_format, i, + frame->info.address, &frame->info, common_flags()->symbolize_vs_style, common_flags()->strip_path_prefix, kInterposedFunctionPrefix); Printf("%s\n", res.data()); diff --git a/compiler-rt/test/sanitizer_common/TestCases/print-stack-trace.cpp b/compiler-rt/test/sanitizer_common/TestCases/print-stack-trace.cpp index 0c0bdc03dac2d..9d7d03d81b531 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/print-stack-trace.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/print-stack-trace.cpp @@ -2,6 +2,7 @@ // RUN: %clangxx -O3 %s -o %t && %env_tool_opts=stack_trace_format=DEFAULT %run %t 2>&1 | FileCheck %s // RUN: %env_tool_opts=stack_trace_format=frame%n_lineno%l %run %t 2>&1 | FileCheck %s --check-prefix=CUSTOM // RUN: %env_tool_opts=symbolize_inline_frames=false:stack_trace_format=DEFAULT %run %t 2>&1 | FileCheck %s --check-prefix=NOINLINE +// RUN: %env_tool_opts=stack_trace_format='"frame:%n address:%%p"' %run %t 2>&1 | FileCheck %s --check-prefix=NOSYMBOLIZE // UNSUPPORTED: darwin @@ -27,3 +28,7 @@ int main() { // NOINLINE: #0 0x{{.*}} in __sanitizer_print_stack_trace // NOINLINE: #1 0x{{.*}} in main{{.*}}print-stack-trace.cpp:[[@LINE-15]] + +// NOSYMBOLIZE: frame:0 address:{{0x.*}} +// NOSYMBOLIZE: frame:1 address:{{0x.*}} +// NOSYMBOLIZE: frame:2 address:{{0x.*}} From c10248829357fd90030ba091e01b6c253e5848f1 Mon Sep 17 00:00:00 2001 From: Dominic Chen Date: Fri, 25 Sep 2020 21:46:49 -0400 Subject: [PATCH 279/321] Add test for disabling Dead Virtual Function Elimination Differential Revision: https://reviews.llvm.org/D88349 --- .../virtual-function-elimination.cpp | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/clang/test/CodeGenCXX/virtual-function-elimination.cpp b/clang/test/CodeGenCXX/virtual-function-elimination.cpp index a89e6ebceeaf9..543537baff900 100644 --- a/clang/test/CodeGenCXX/virtual-function-elimination.cpp +++ b/clang/test/CodeGenCXX/virtual-function-elimination.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -triple x86_64-unknown-linux -flto -flto-unit -fvirtual-function-elimination -fwhole-program-vtables -emit-llvm -o - %s | FileCheck %s - +// RUN: %clang -target x86_64-unknown-linux -flto -fvirtual-function-elimination -fno-virtual-function-elimination -fwhole-program-vtables -S -emit-llvm -o - %s | FileCheck %s -check-prefix=NOVFE struct __attribute__((visibility("default"))) A { virtual void foo(); @@ -8,9 +8,13 @@ struct __attribute__((visibility("default"))) A { void test_1(A *p) { // A has default visibility, so no need for type.checked.load. // CHECK-LABEL: define void @_Z6test_1P1A +// NOVFE-LABEL: define dso_local void @_Z6test_1P1A // CHECK: [[FN_PTR_ADDR:%.+]] = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** {{%.+}}, i64 0 +// NOVFE: [[FN_PTR_ADDR:%.+]] = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** {{%.+}}, i64 0 // CHECK: [[FN_PTR:%.+]] = load void (%struct.A*)*, void (%struct.A*)** [[FN_PTR_ADDR]] +// NOVFE: [[FN_PTR:%.+]] = load void (%struct.A*)*, void (%struct.A*)** [[FN_PTR_ADDR]] // CHECK: call void [[FN_PTR]]( +// NOVFE: call void [[FN_PTR]]( p->foo(); } @@ -22,9 +26,13 @@ struct __attribute__((visibility("hidden"))) [[clang::lto_visibility_public]] B void test_2(B *p) { // B has public LTO visibility, so no need for type.checked.load. // CHECK-LABEL: define void @_Z6test_2P1B +// NOVFE-LABEL: define dso_local void @_Z6test_2P1B // CHECK: [[FN_PTR_ADDR:%.+]] = getelementptr inbounds void (%struct.B*)*, void (%struct.B*)** {{%.+}}, i64 0 +// NOVFE: [[FN_PTR_ADDR:%.+]] = getelementptr inbounds void (%struct.B*)*, void (%struct.B*)** {{%.+}}, i64 0 // CHECK: [[FN_PTR:%.+]] = load void (%struct.B*)*, void (%struct.B*)** [[FN_PTR_ADDR]] +// NOVFE: [[FN_PTR:%.+]] = load void (%struct.B*)*, void (%struct.B*)** [[FN_PTR_ADDR]] // CHECK: call void [[FN_PTR]]( +// NOVFE: call void [[FN_PTR]]( p->foo(); } @@ -37,10 +45,14 @@ struct __attribute__((visibility("hidden"))) C { void test_3(C *p) { // C has hidden visibility, so we generate type.checked.load to allow VFE. // CHECK-LABEL: define void @_Z6test_3P1C +// NOVFE-LABEL: define dso_local void @_Z6test_3P1C // CHECK: [[LOAD:%.+]] = call { i8*, i1 } @llvm.type.checked.load(i8* {{%.+}}, i32 0, metadata !"_ZTS1C") +// NOVFE: call i1 @llvm.type.test(i8* {{%.+}}, metadata !"_ZTS1C") // CHECK: [[FN_PTR_I8:%.+]] = extractvalue { i8*, i1 } [[LOAD]], 0 +// NOVFE: [[FN_PTR:%.+]] = load void (%struct.C*)*, void (%struct.C*)** {{%.+}}, align 8 // CHECK: [[FN_PTR:%.+]] = bitcast i8* [[FN_PTR_I8]] to void (%struct.C*)* // CHECK: call void [[FN_PTR]]( +// NOVFE: call void [[FN_PTR]]( p->foo(); } @@ -48,10 +60,14 @@ void test_4(C *p) { // When using type.checked.load, we pass the vtable offset to the intrinsic, // rather than adding it to the pointer with a GEP. // CHECK-LABEL: define void @_Z6test_4P1C +// NOVFE-LABEL: define dso_local void @_Z6test_4P1C // CHECK: [[LOAD:%.+]] = call { i8*, i1 } @llvm.type.checked.load(i8* {{%.+}}, i32 8, metadata !"_ZTS1C") +// NOVFE: call i1 @llvm.type.test(i8* {{%.+}}, metadata !"_ZTS1C") // CHECK: [[FN_PTR_I8:%.+]] = extractvalue { i8*, i1 } [[LOAD]], 0 +// NOVFE: [[FN_PTR:%.+]] = load void (%struct.C*)*, void (%struct.C*)** {{%.+}}, align 8 // CHECK: [[FN_PTR:%.+]] = bitcast i8* [[FN_PTR_I8]] to void (%struct.C*)* // CHECK: call void [[FN_PTR]]( +// NOVFE: call void [[FN_PTR]]( p->bar(); } @@ -64,12 +80,17 @@ void test_5(C *p, void (C::*q)(void)) { // function pointer to the intrinsic, this information would be lost. No // codegen changes on the non-virtual side. // CHECK-LABEL: define void @_Z6test_5P1CMS_FvvE( +// NOVFE-LABEL: define dso_local void @_Z6test_5P1CMS_FvvE( // CHECK: [[FN_PTR_ADDR:%.+]] = getelementptr i8, i8* %vtable, i64 {{%.+}} // CHECK: [[LOAD:%.+]] = call { i8*, i1 } @llvm.type.checked.load(i8* [[FN_PTR_ADDR]], i32 0, metadata !"_ZTSM1CFvvE.virtual") +// NOVFE-NOT: call { i8*, i1 } @llvm.type.checked.load(i8* {{%.+}}, i32 0, metadata !"_ZTSM1CFvvE.virtual") // CHECK: [[FN_PTR_I8:%.+]] = extractvalue { i8*, i1 } [[LOAD]], 0 // CHECK: [[FN_PTR:%.+]] = bitcast i8* [[FN_PTR_I8]] to void (%struct.C*)* +// NOVFE: [[FN_PTR:%.+]] = load void (%struct.C*)*, void (%struct.C*)** {{%.+}}, align 8 // CHECK: [[PHI:%.+]] = phi void (%struct.C*)* {{.*}}[ [[FN_PTR]], {{.*}} ] +// NOVFE: [[PHI:%.+]] = phi void (%struct.C*)* {{.*}}[ [[FN_PTR]], {{.*}} ] // CHECK: call void [[PHI]]( +// NOVFE: call void [[PHI]]( (p->*q)(); } From 68e1a8d20795802077987529e1268c184d749564 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 7 Oct 2020 14:20:18 -0700 Subject: [PATCH 280/321] [X86] Defer the creation of LCMPXCHG16B_SAVE_RBX until finalize-isel We need to use LCMPXCHG16B_SAVE_RBX if RBX/EBX is being used as the frame pointer. We previously checked for this during type legalization, but that's too early to know for sure if the base pointer is needed. This patch adds a new pseudo instruction to emit from isel that uses a virtual register for the RBX input. Then we use the custom inserter hook to emit LCMPXCHG16B if RBX isn't needed as a base pointer or LCMPXCHG16B_SAVE_RBX if it is. Fixes PR42064. Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D88808 --- llvm/lib/Target/X86/X86ExpandPseudo.cpp | 4 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 71 ++++++++++-------- llvm/lib/Target/X86/X86ISelLowering.h | 1 - llvm/lib/Target/X86/X86InstrCompiler.td | 46 +++++++----- llvm/lib/Target/X86/X86InstrInfo.td | 14 +--- llvm/test/CodeGen/X86/pr42064.ll | 98 +++++++++++++++++++++++++ 6 files changed, 171 insertions(+), 63 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pr42064.ll diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index a5a1a4ff93e6f..d9c0964e9ed83 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -338,9 +338,9 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // Perform the following transformation. // SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx // => - // [E|R]BX = InArg + // RBX = InArg // actualcmpxchg Addr - // [E|R]BX = SaveRbx + // RBX = SaveRbx const MachineOperand &InArg = MBBI->getOperand(6); Register SaveRbx = MBBI->getOperand(7).getReg(); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 66986a1b9c108..b320df3fec904 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30481,38 +30481,30 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, swapInH = DAG.getCopyToReg(cpInH.getValue(0), dl, Regs64bit ? X86::RCX : X86::ECX, swapInH, cpInH.getValue(1)); - // If the current function needs the base pointer, RBX, - // we shouldn't use cmpxchg directly. - // Indeed the lowering of that instruction will clobber - // that register and since RBX will be a reserved register - // the register allocator will not make sure its value will - // be properly saved and restored around this live-range. - const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); + + // In 64-bit mode we might need the base pointer in RBX, but we can't know + // until later. So we keep the RBX input in a vreg and use a custom + // inserter. + // Since RBX will be a reserved register the register allocator will not + // make sure its value will be properly saved and restored around this + // live-range. SDValue Result; SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); - Register BasePtr = TRI->getBaseRegister(); MachineMemOperand *MMO = cast(N)->getMemOperand(); - if (TRI->hasBasePointer(DAG.getMachineFunction()) && - (BasePtr == X86::RBX || BasePtr == X86::EBX)) { - assert(Regs64bit && "RBX/EBX base pointer only expected for i128 CAS"); - SDValue RBXSave = DAG.getCopyFromReg(swapInH.getValue(0), dl, - X86::RBX, - HalfT, swapInH.getValue(1)); - SDValue Ops[] = {/*Chain*/ RBXSave.getValue(1), N->getOperand(1), swapInL, - RBXSave, - /*Glue*/ RBXSave.getValue(2)}; - Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG16_SAVE_RBX_DAG, dl, Tys, - Ops, T, MMO); + if (Regs64bit) { + SDValue Ops[] = {swapInH.getValue(0), N->getOperand(1), swapInL, + swapInH.getValue(1)}; + Result = + DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG16_DAG, dl, Tys, Ops, T, MMO); } else { - unsigned Opcode = - Regs64bit ? X86ISD::LCMPXCHG16_DAG : X86ISD::LCMPXCHG8_DAG; - swapInL = DAG.getCopyToReg(swapInH.getValue(0), dl, - Regs64bit ? X86::RBX : X86::EBX, swapInL, + swapInL = DAG.getCopyToReg(swapInH.getValue(0), dl, X86::EBX, swapInL, swapInH.getValue(1)); SDValue Ops[] = {swapInL.getValue(0), N->getOperand(1), swapInL.getValue(1)}; - Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO); + Result = + DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, T, MMO); } + SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, Regs64bit ? X86::RAX : X86::EAX, HalfT, Result.getValue(1)); @@ -30811,7 +30803,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(LCMPXCHG_DAG) NODE_NAME_CASE(LCMPXCHG8_DAG) NODE_NAME_CASE(LCMPXCHG16_DAG) - NODE_NAME_CASE(LCMPXCHG8_SAVE_EBX_DAG) NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG) NODE_NAME_CASE(LADD) NODE_NAME_CASE(LSUB) @@ -33770,11 +33761,31 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, return BB; } - case X86::LCMPXCHG16B: - return BB; - case X86::LCMPXCHG16B_SAVE_RBX: { - if (!BB->isLiveIn(X86::RBX)) - BB->addLiveIn(X86::RBX); + case X86::LCMPXCHG16B_NO_RBX: { + const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); + Register BasePtr = TRI->getBaseRegister(); + X86AddressMode AM = getAddressFromInstr(&MI, 0); + if (TRI->hasBasePointer(*MF) && + (BasePtr == X86::RBX || BasePtr == X86::EBX)) { + if (!BB->isLiveIn(BasePtr)) + BB->addLiveIn(BasePtr); + // Save RBX into a virtual register. + Register SaveRBX = + MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX) + .addReg(X86::RBX); + Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); + addFullAddress( + BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst), AM) + .add(MI.getOperand(X86::AddrNumOperands)) + .addReg(SaveRBX); + } else { + // Simple case, just copy the virtual register to RBX. + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::RBX) + .add(MI.getOperand(X86::AddrNumOperands)); + addFullAddress(BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B)), AM); + } + MI.eraseFromParent(); return BB; } case X86::MWAITX: { diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index f9bf6fb988ebe..24b2c8fd31c25 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -756,7 +756,6 @@ namespace llvm { LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, LCMPXCHG8_DAG, LCMPXCHG16_DAG, - LCMPXCHG8_SAVE_EBX_DAG, LCMPXCHG16_SAVE_RBX_DAG, /// LOCK-prefixed arithmetic read-modify-write instructions. diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 195ea8b1b1272..b0e4bd1ee761b 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -809,15 +809,6 @@ let Predicates = [UseIncDec] in { } // Atomic compare and swap. -multiclass LCMPXCHG_UnOp Opc, Format Form, string mnemonic, - SDPatternOperator frag, X86MemOperand x86memop> { -let isCodeGenOnly = 1, usesCustomInserter = 1 in { - def NAME : I, TB, LOCK; -} -} - multiclass LCMPXCHG_BinOp Opc8, bits<8> Opc, Format Form, string mnemonic, SDPatternOperator frag> { let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in { @@ -841,14 +832,19 @@ let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in { } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], - Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW] in { -defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>; + Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW], + isCodeGenOnly = 1, usesCustomInserter = 1 in { +def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), + "cmpxchg8b\t$ptr", + [(X86cas8 addr:$ptr)]>, TB, LOCK; } let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], - Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW] in { -defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b", - X86cas16, i128mem>, REX_W; + Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW], + isCodeGenOnly = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in { +def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr), + "cmpxchg16b\t$ptr", + []>, TB, LOCK; } // This pseudo must be used when the frame uses RBX as @@ -872,14 +868,24 @@ defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b", // the value of RBX. let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX], Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW], - isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst", - usesCustomInserter = 1 in { + isCodeGenOnly = 1, isPseudo = 1, + mayLoad = 1, mayStore = 1, hasSideEffects = 0, + Constraints = "$rbx_save = $dst" in { def LCMPXCHG16B_SAVE_RBX : I<0, Pseudo, (outs GR64:$dst), - (ins i128mem:$ptr, GR64:$rbx_input, GR64:$rbx_save), - !strconcat("cmpxchg16b", "\t$ptr"), - [(set GR64:$dst, (X86cas16save_rbx addr:$ptr, GR64:$rbx_input, - GR64:$rbx_save))]>; + (ins i128mem:$ptr, GR64:$rbx_input, GR64:$rbx_save), "", []>; +} + +// Pseudo instruction that doesn't read/write RBX. Will be turned into either +// LCMPXCHG16B_SAVE_RBX or LCMPXCHG16B via a custom inserter. +let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RCX, RDX], + Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW], + isCodeGenOnly = 1, isPseudo = 1, + mayLoad = 1, mayStore = 1, hasSideEffects = 0, + usesCustomInserter = 1 in { +def LCMPXCHG16B_NO_RBX : + I<0, Pseudo, (outs), (ins i128mem:$ptr, GR64:$rbx_input), "", + [(X86cas16 addr:$ptr, GR64:$rbx_input)]>; } // This pseudo must be used when the frame uses RBX/EBX as diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index ada5c2ffdc0bb..5251998b2b5a2 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -69,10 +69,8 @@ def SDTX86wrpkru : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, SDTCisVT<2, i8>]>; -def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; -def SDTX86caspairSaveRbx16 : SDTypeProfile<1, 3, - [SDTCisVT<0, i64>, SDTCisPtrTy<1>, - SDTCisVT<2, i64>, SDTCisVT<3, i64>]>; +def SDTX86cas8pair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def SDTX86cas16pair : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i64>]>; def SDTLockBinaryArithWithFlags : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, @@ -171,16 +169,12 @@ def X86wrpkru : SDNode<"X86ISD::WRPKRU", SDTX86wrpkru, def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86caspair, +def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8pair, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair, +def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86cas16pair, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def X86cas16save_rbx : SDNode<"X86ISD::LCMPXCHG16_SAVE_RBX_DAG", - SDTX86caspairSaveRbx16, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue, - SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; diff --git a/llvm/test/CodeGen/X86/pr42064.ll b/llvm/test/CodeGen/X86/pr42064.ll new file mode 100644 index 0000000000000..6269a59ff055e --- /dev/null +++ b/llvm/test/CodeGen/X86/pr42064.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc19.11.0 -mattr=+avx,+cx16 | FileCheck %s + +%struct.TestStruct = type { %union.Int128 } +%union.Int128 = type { i128 } +%struct.SomeArrays = type { %struct.SillyArray, %struct.SillyArray, %struct.SillyArray } +%struct.SillyArray = type { i8*, i32, i32 } + +declare void @llvm.lifetime.start.p0i8(i64, i8*) + +define void @foo(%struct.TestStruct* %arg) align 2 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { +; Check that %rbx is being used for a frame pointer +; CHECK-LABEL: foo: +; CHECK: movq %rsp, %rbx + +; Check that %rbx is saved and restored around both lock cmpxchg16b. +; CHECK: movq %rbx, %r9 +; CHECK-NEXT: movabsq $1393743441367457520, %rcx # imm = 0x135792468ABCDEF0 +; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: movq %rcx, %rdx +; CHECK-NEXT: movq %rcx, %rbx +; CHECK-NEXT: lock cmpxchg16b (%r8) +; CHECK-NEXT: movq %r9, %rbx + +; CHECK: movq %rbx, %r9 +; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: movq %rcx, %rdx +; CHECK-NEXT: movq %rcx, %rbx +; CHECK-NEXT: lock cmpxchg16b (%r8) +; CHECK-NEXT: movq %r9, %rbx +bb: + %i = alloca %struct.SomeArrays, align 8 + %i1 = alloca %struct.SomeArrays, align 8 + %i2 = getelementptr inbounds %struct.TestStruct, %struct.TestStruct* %arg, i64 0, i32 0, i32 0 + %i3 = cmpxchg i128* %i2, i128 25710028567316702934644703134494809840, i128 25710028567316702934644703134494809840 seq_cst seq_cst + %i4 = extractvalue { i128, i1 } %i3, 0 + %i5 = trunc i128 %i4 to i64 + %i6 = icmp eq i64 %i5, 0 + br i1 %i6, label %bb9, label %bb7 + +bb7: ; preds = %bb + %i8 = cmpxchg i128* %i2, i128 25710028567316702934644703134494809840, i128 25710028567316702934644703134494809840 seq_cst seq_cst + br label %bb9 + +bb9: ; preds = %bb7, %bb + %i10 = bitcast %struct.SomeArrays* %i to i8* + call void @llvm.lifetime.start.p0i8(i64 48, i8* nonnull %i10) + call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(48) %i10, i8 0, i64 48, i1 false) + %i11 = bitcast %struct.SomeArrays* %i1 to i8* + call void @llvm.lifetime.start.p0i8(i64 48, i8* nonnull %i11) + %i12 = bitcast %struct.SomeArrays* %i1 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(48) %i12, i8 0, i64 48, i1 false) + %i13 = invoke nonnull align 8 dereferenceable(48) %struct.SomeArrays* @"??4SomeArrays@@QEAAAEAU0@$$QEAU0@@Z"(%struct.SomeArrays* nonnull %i, %struct.SomeArrays* nonnull align 8 dereferenceable(48) %i1) + to label %bb14 unwind label %bb45 + +bb14: ; preds = %bb9 + call void @llvm.lifetime.end.p0i8(i64 48, i8* nonnull %i10) + ret void + +bb45: ; preds = %bb9 + %i46 = cleanuppad within none [] + %i47 = getelementptr inbounds %struct.SomeArrays, %struct.SomeArrays* %i1, i64 0, i32 2, i32 0 + %i48 = load i8*, i8** %i47, align 8 + invoke void @"?free@@YAXPEAX@Z"(i8* %i48) [ "funclet"(token %i46) ] + to label %bb51 unwind label %bb49 + +bb49: ; preds = %bb45 + %i50 = cleanuppad within %i46 [] + call void @__std_terminate() [ "funclet"(token %i50) ] + unreachable + +bb51: ; preds = %bb45 + %i52 = getelementptr inbounds %struct.SomeArrays, %struct.SomeArrays* %i1, i64 0, i32 1, i32 0 + %i53 = load i8*, i8** %i52, align 8 + invoke void @"?free@@YAXPEAX@Z"(i8* %i53) [ "funclet"(token %i46) ] + to label %bb56 unwind label %bb54 + +bb54: ; preds = %bb51 + %i55 = cleanuppad within %i46 [] + call void @__std_terminate() [ "funclet"(token %i55) ] + unreachable + +bb56: ; preds = %bb51 + call void @llvm.lifetime.end.p0i8(i64 48, i8* nonnull %i10) + cleanupret from %i46 unwind to caller +} + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1) + +declare dso_local i32 @__CxxFrameHandler3(...) + +declare nonnull align 8 dereferenceable(48) %struct.SomeArrays* @"??4SomeArrays@@QEAAAEAU0@$$QEAU0@@Z"(%struct.SomeArrays*, %struct.SomeArrays* nonnull align 8 dereferenceable(48)) align 2 + +declare void @"?free@@YAXPEAX@Z"(i8*) + +declare void @__std_terminate() From 35cb45c533fb76dcfc9f44b4e8bbd5d8a855ed2a Mon Sep 17 00:00:00 2001 From: Anna Thomas Date: Thu, 24 Sep 2020 11:00:46 -0400 Subject: [PATCH 281/321] [ImplicitNullChecks] Support complex addressing mode The pass is updated to handle loads through complex addressing mode, specifically, when we have a scaled register and a scale. It requires two API updates in TII which have been implemented for X86. See added IR and MIR testcases. Tests-Run: make check Reviewed-By: reames, danstrushin Differential Revision: https://reviews.llvm.org/D87148 --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 28 ++++++ llvm/lib/CodeGen/ImplicitNullChecks.cpp | 92 +++++++++++++++++-- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 18 ++++ llvm/lib/Target/AArch64/AArch64InstrInfo.h | 4 + llvm/lib/Target/X86/X86InstrInfo.cpp | 39 ++++++++ llvm/lib/Target/X86/X86InstrInfo.h | 7 ++ .../X86/implicit-null-check-negative.ll | 20 ++++ llvm/test/CodeGen/X86/implicit-null-check.ll | 34 ++++++- .../test/CodeGen/X86/implicit-null-checks.mir | 45 +++++++++ 9 files changed, 273 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 0629c81d4f4f8..f00741530b8f4 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -80,6 +80,15 @@ struct RegImmPair { RegImmPair(Register Reg, int64_t Imm) : Reg(Reg), Imm(Imm) {} }; +/// Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare. +/// It holds the register values, the scale value and the displacement. +struct ExtAddrMode { + Register BaseReg; + Register ScaledReg; + int64_t Scale; + int64_t Displacement; +}; + //--------------------------------------------------------------------------- /// /// TargetInstrInfo - Interface to description of machine instruction set @@ -968,6 +977,15 @@ class TargetInstrInfo : public MCInstrInfo { return None; } + /// Returns true if MI is an instruction that defines Reg to have a constant + /// value and the value is recorded in ImmVal. The ImmVal is a result that + /// should be interpreted as modulo size of Reg. + virtual bool getConstValDefinedInReg(const MachineInstr &MI, + const Register Reg, + int64_t &ImmVal) const { + return false; + } + /// Store the specified register of the given register class to the specified /// stack frame index. The store instruction is to be added to the given /// machine basic block before the specified machine instruction. If isKill @@ -1270,6 +1288,16 @@ class TargetInstrInfo : public MCInstrInfo { return false; } + /// Target dependent implementation to get the values constituting the address + /// MachineInstr that is accessing memory. These values are returned as a + /// struct ExtAddrMode which contains all relevant information to make up the + /// address. + virtual Optional + getAddrModeFromMemoryOp(const MachineInstr &MemI, + const TargetRegisterInfo *TRI) const { + return None; + } + /// Returns true if MI's Def is NullValueReg, and the MI /// does not change the Zero value. i.e. cases such as rax = shr rax, X where /// NullValueReg = rax. Note that if the NullValueReg is non-zero, this diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp index 65728adfeb0cd..20666f27aab9e 100644 --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -378,26 +378,100 @@ ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI, if (MI.getDesc().getNumDefs() > 1) return SR_Unsuitable; - // FIXME: This handles only simple addressing mode. - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) + if (!MI.mayLoadOrStore() || MI.isPredicable()) + return SR_Unsuitable; + auto AM = TII->getAddrModeFromMemoryOp(MI, TRI); + if (!AM) return SR_Unsuitable; + auto AddrMode = *AM; + const Register BaseReg = AddrMode.BaseReg, ScaledReg = AddrMode.ScaledReg; + int64_t Displacement = AddrMode.Displacement; // We need the base of the memory instruction to be same as the register // where the null check is performed (i.e. PointerReg). - if (!BaseOp->isReg() || BaseOp->getReg() != PointerReg) + if (BaseReg != PointerReg && ScaledReg != PointerReg) return SR_Unsuitable; - - // Scalable offsets are a part of scalable vectors (SVE for AArch64). That - // target is in-practice unsupported for ImplicitNullChecks. - if (OffsetIsScalable) + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); + unsigned PointerRegSizeInBits = TRI->getRegSizeInBits(PointerReg, MRI); + // Bail out of the sizes of BaseReg, ScaledReg and PointerReg are not the + // same. + if ((BaseReg && + TRI->getRegSizeInBits(BaseReg, MRI) != PointerRegSizeInBits) || + (ScaledReg && + TRI->getRegSizeInBits(ScaledReg, MRI) != PointerRegSizeInBits)) return SR_Unsuitable; - if (!MI.mayLoadOrStore() || MI.isPredicable()) + // Returns true if RegUsedInAddr is used for calculating the displacement + // depending on addressing mode. Also calculates the Displacement. + auto CalculateDisplacementFromAddrMode = [&](Register RegUsedInAddr, + int64_t Multiplier) { + // The register can be NoRegister, which is defined as zero for all targets. + // Consider instruction of interest as `movq 8(,%rdi,8), %rax`. Here the + // ScaledReg is %rdi, while there is no BaseReg. + if (!RegUsedInAddr) + return false; + assert(Multiplier && "expected to be non-zero!"); + MachineInstr *ModifyingMI = nullptr; + for (auto It = std::next(MachineBasicBlock::const_reverse_iterator(&MI)); + It != MI.getParent()->rend(); It++) { + const MachineInstr *CurrMI = &*It; + if (CurrMI->modifiesRegister(RegUsedInAddr, TRI)) { + ModifyingMI = const_cast(CurrMI); + break; + } + } + if (!ModifyingMI) + return false; + // Check for the const value defined in register by ModifyingMI. This means + // all other previous values for that register has been invalidated. + int64_t ImmVal; + if (!TII->getConstValDefinedInReg(*ModifyingMI, RegUsedInAddr, ImmVal)) + return false; + // Calculate the reg size in bits, since this is needed for bailing out in + // case of overflow. + int32_t RegSizeInBits = TRI->getRegSizeInBits(RegUsedInAddr, MRI); + APInt ImmValC(RegSizeInBits, ImmVal, true /*IsSigned*/); + APInt MultiplierC(RegSizeInBits, Multiplier); + assert(MultiplierC.isStrictlyPositive() && + "expected to be a positive value!"); + bool IsOverflow; + // Sign of the product depends on the sign of the ImmVal, since Multiplier + // is always positive. + APInt Product = ImmValC.smul_ov(MultiplierC, IsOverflow); + if (IsOverflow) + return false; + APInt DisplacementC(64, Displacement, true /*isSigned*/); + DisplacementC = Product.sadd_ov(DisplacementC, IsOverflow); + if (IsOverflow) + return false; + + // We only handle diplacements upto 64 bits wide. + if (DisplacementC.getActiveBits() > 64) + return false; + Displacement = DisplacementC.getSExtValue(); + return true; + }; + + // If a register used in the address is constant, fold it's effect into the + // displacement for ease of analysis. + bool BaseRegIsConstVal = false, ScaledRegIsConstVal = false; + if (CalculateDisplacementFromAddrMode(BaseReg, 1)) + BaseRegIsConstVal = true; + if (CalculateDisplacementFromAddrMode(ScaledReg, AddrMode.Scale)) + ScaledRegIsConstVal = true; + + // The register which is not null checked should be part of the Displacement + // calculation, otherwise we do not know whether the Displacement is made up + // by some symbolic values. + // This matters because we do not want to incorrectly assume that load from + // falls in the zeroth faulting page in the "sane offset check" below. + if ((BaseReg && BaseReg != PointerReg && !BaseRegIsConstVal) || + (ScaledReg && ScaledReg != PointerReg && !ScaledRegIsConstVal)) return SR_Unsuitable; // We want the mem access to be issued at a sane offset from PointerReg, // so that if PointerReg is null then the access reliably page faults. - if (!(-PageSize < Offset && Offset < PageSize)) + if (!(-PageSize < Displacement && Displacement < PageSize)) return SR_Unsuitable; // Finally, check whether the current memory access aliases with previous one. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 53ae3370c217c..654c748a145bf 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2144,6 +2144,24 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth( return true; } +Optional +AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI, + const TargetRegisterInfo *TRI) const { + const MachineOperand *Base; // Filled with the base operand of MI. + int64_t Offset; // Filled with the offset of MI. + bool OffsetIsScalable; + if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI)) + return None; + + if (!Base->isReg()) + return None; + ExtAddrMode AM; + AM.BaseReg = Base->getReg(); + AM.Displacement = Offset; + AM.ScaledReg = 0; + return AM; +} + bool AArch64InstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 1a21d8474e071..92e2747e64a36 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -113,6 +113,10 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { /// Hint that pairing the given load or store is unprofitable. static void suppressLdStPair(MachineInstr &MI); + Optional + getAddrModeFromMemoryOp(const MachineInstr &MemI, + const TargetRegisterInfo *TRI) const override; + bool getMemOperandsWithOffsetWidth( const MachineInstr &MI, SmallVectorImpl &BaseOps, int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 1f4bf30cc1d02..56226bf78c05a 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3663,6 +3663,45 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, } } +Optional +X86InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI, + const TargetRegisterInfo *TRI) const { + const MCInstrDesc &Desc = MemI.getDesc(); + int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags); + if (MemRefBegin < 0) + return None; + + MemRefBegin += X86II::getOperandBias(Desc); + + auto &BaseOp = MemI.getOperand(MemRefBegin + X86::AddrBaseReg); + if (!BaseOp.isReg()) // Can be an MO_FrameIndex + return None; + + const MachineOperand &DispMO = MemI.getOperand(MemRefBegin + X86::AddrDisp); + // Displacement can be symbolic + if (!DispMO.isImm()) + return None; + + ExtAddrMode AM; + AM.BaseReg = BaseOp.getReg(); + AM.ScaledReg = MemI.getOperand(MemRefBegin + X86::AddrIndexReg).getReg(); + AM.Scale = MemI.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm(); + AM.Displacement = DispMO.getImm(); + return AM; +} + +bool X86InstrInfo::getConstValDefinedInReg(const MachineInstr &MI, + const Register Reg, + int64_t &ImmVal) const { + if (MI.getOpcode() != X86::MOV32ri && MI.getOpcode() != X86::MOV64ri) + return false; + // Mov Src can be a global address. + if (!MI.getOperand(1).isImm() || MI.getOperand(0).getReg() != Reg) + return false; + ImmVal = MI.getOperand(1).getImm(); + return true; +} + bool X86InstrInfo::preservesZeroValueInReg( const MachineInstr *MI, const Register NullValueReg, const TargetRegisterInfo *TRI) const { diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 215318105de45..d7d2370c6f678 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -317,6 +317,13 @@ class X86InstrInfo final : public X86GenInstrInfo { SmallVectorImpl &Cond, bool AllowModify) const override; + Optional + getAddrModeFromMemoryOp(const MachineInstr &MemI, + const TargetRegisterInfo *TRI) const override; + + bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, + int64_t &ImmVal) const override; + bool preservesZeroValueInReg(const MachineInstr *MI, const Register NullValueReg, const TargetRegisterInfo *TRI) const override; diff --git a/llvm/test/CodeGen/X86/implicit-null-check-negative.ll b/llvm/test/CodeGen/X86/implicit-null-check-negative.ll index d7eae8c98173a..da525b4548deb 100644 --- a/llvm/test/CodeGen/X86/implicit-null-check-negative.ll +++ b/llvm/test/CodeGen/X86/implicit-null-check-negative.ll @@ -129,4 +129,24 @@ define i64 @imp_null_check_load_shift_add_addr(i64* %x, i64 %r) { %t = load i64, i64* %x.loc ret i64 %t } + +; the memory op is not within faulting page. +define i64 @imp_null_check_load_addr_outside_faulting_page(i64* %x) { + entry: + %c = icmp eq i64* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i64 42 + + not_null: + %y = ptrtoint i64* %x to i64 + %shry = shl i64 %y, 3 + %shry.add = add i64 %shry, 68719472640 + %y.ptr = inttoptr i64 %shry.add to i64* + %x.loc = getelementptr i64, i64* %y.ptr, i64 1 + %t = load i64, i64* %x.loc + ret i64 %t +} + !0 = !{} diff --git a/llvm/test/CodeGen/X86/implicit-null-check.ll b/llvm/test/CodeGen/X86/implicit-null-check.ll index c6241b18f785e..a6566faf99fae 100644 --- a/llvm/test/CodeGen/X86/implicit-null-check.ll +++ b/llvm/test/CodeGen/X86/implicit-null-check.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -O3 -mtriple=x86_64-apple-macosx -enable-implicit-null-checks < %s | FileCheck %s define i32 @imp_null_check_load(i32* %x) { @@ -593,14 +592,12 @@ define i64 @imp_null_check_load_shift_addr(i64* %x) { ; Same as imp_null_check_load_shift_addr but shift is by 3 and this is now ; converted into complex addressing. -; TODO: Can be converted into implicit null check define i64 @imp_null_check_load_shift_by_3_addr(i64* %x) { ; CHECK-LABEL: imp_null_check_load_shift_by_3_addr: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: je LBB22_1 +; CHECK-NEXT: Ltmp18: +; CHECK-NEXT: movq 8(,%rdi,8), %rax ## on-fault: LBB22_1 ; CHECK-NEXT: ## %bb.2: ## %not_null -; CHECK-NEXT: movq 8(,%rdi,8), %rax ; CHECK-NEXT: retq ; CHECK-NEXT: LBB22_1: ## %is_null ; CHECK-NEXT: movl $42, %eax @@ -621,4 +618,31 @@ define i64 @imp_null_check_load_shift_by_3_addr(i64* %x) { %t = load i64, i64* %x.loc ret i64 %t } + +define i64 @imp_null_check_load_shift_add_addr(i64* %x) { +; CHECK-LABEL: imp_null_check_load_shift_add_addr: +; CHECK: ## %bb.0: ## %entry +; CHECK: movq 3526(,%rdi,8), %rax ## on-fault: LBB23_1 +; CHECK-NEXT: ## %bb.2: ## %not_null +; CHECK-NEXT: retq +; CHECK-NEXT: LBB23_1: ## %is_null +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: retq + + entry: + %c = icmp eq i64* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i64 42 + + not_null: + %y = ptrtoint i64* %x to i64 + %shry = shl i64 %y, 3 + %shry.add = add i64 %shry, 3518 + %y.ptr = inttoptr i64 %shry.add to i64* + %x.loc = getelementptr i64, i64* %y.ptr, i64 1 + %t = load i64, i64* %x.loc + ret i64 %t +} !0 = !{} diff --git a/llvm/test/CodeGen/X86/implicit-null-checks.mir b/llvm/test/CodeGen/X86/implicit-null-checks.mir index e1ac01a829730..e66bdea00bc35 100644 --- a/llvm/test/CodeGen/X86/implicit-null-checks.mir +++ b/llvm/test/CodeGen/X86/implicit-null-checks.mir @@ -377,6 +377,22 @@ ret i32 undef } + define i32 @imp_null_check_address_mul_overflow(i32* %x, i32 %a) { + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: ; preds = %entry + ret i32 42 + + not_null: ; preds = %entry + %y = ptrtoint i32* %x to i32 + %y64 = zext i32 %y to i64 + %b = mul i64 %y64, 9223372036854775807 ; 0X0FFFF.. i.e. 2^63 - 1 + %z = trunc i64 %b to i32 + ret i32 %z + } + attributes #0 = { "target-features"="+bmi,+bmi2" } !0 = !{} @@ -1316,3 +1332,32 @@ body: | RETQ $eax ... +--- +name: imp_null_check_address_mul_overflow +# CHECK-LABEL: name: imp_null_check_address_mul_overflow +# CHECK: bb.0.entry: +# CHECK-NOT: FAULTING_OP +alignment: 16 +tracksRegLiveness: true +liveins: + - { reg: '$rdi' } + - { reg: '$rsi' } +body: | + bb.0.entry: + liveins: $rsi, $rdi + + TEST64rr $rdi, $rdi, implicit-def $eflags + JCC_1 %bb.1, 4, implicit $eflags + + bb.2.not_null: + liveins: $rdi, $rsi + + $rcx = MOV64ri -9223372036854775808 + $eax = MOV32rm killed $rdi, 2, $rcx, 0, $noreg, implicit-def $rax + RETQ $eax + + bb.1.is_null: + $eax = MOV32ri 42 + RETQ $eax + +... From 80ef4126b100fd3c9823b20ac641fe76c4d5a11f Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Wed, 23 Sep 2020 15:53:45 -0700 Subject: [PATCH 282/321] [libcxx] Use runtime rather then compile-time glibc version check glibc supports versioning, so it's possible to build against older version and run against newer version. This is sometimes relied on in practice, e.g. in Fuchsia build we build against older sysroot (equivalent to Ubuntu Trusty) to cover the broadest possible range of host systems, but that doesn't necessarily match the system that binary is going to run on which may have newer version, in which case the compile test used in curr_symbol is going to fail. Using runtime check is more reliable. This is a follow up to D56702 which addressed one instance, this patch addresses all of the remaining ones. Differential Revision: https://reviews.llvm.org/D88188 --- .../get_long_double_fr_FR.pass.cpp | 7 +++---- .../put_long_double_fr_FR.pass.cpp | 7 +++---- .../curr_symbol.pass.cpp | 14 -------------- .../decimal_point.pass.cpp | 11 ++++------- .../thousands_sep.pass.cpp | 16 +++++----------- .../thousands_sep.pass.cpp | 10 +++------- libcxx/test/support/platform_support.h | 14 ++++++++++++++ 7 files changed, 32 insertions(+), 47 deletions(-) diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp index 7ec83e27dd4ee..862bc9018cbf9 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp @@ -54,10 +54,9 @@ class my_facetw // this function converts the spaces in string inputs to that character if need // be. static std::wstring convert_thousands_sep(std::wstring const& in) { -#ifndef TEST_GLIBC_PREREQ -#define TEST_GLIBC_PREREQ(x, y) 0 -#endif -#if TEST_GLIBC_PREREQ(2,27) +#if defined(_CS_GNU_LIBC_VERSION) + if (glibc_version_less_than("2.27")) + return in; std::wstring out; unsigned I = 0; bool seen_decimal = false; diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp index 4a800bce02db9..e35bc2573d8cd 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp @@ -54,10 +54,9 @@ class my_facetw // this function converts the spaces in string inputs to that character if need // be. static std::wstring convert_thousands_sep(std::wstring const& in) { -#ifndef TEST_GLIBC_PREREQ -#define TEST_GLIBC_PREREQ(x, y) 0 -#endif -#if TEST_GLIBC_PREREQ(2,27) +#if defined(_CS_GNU_LIBC_VERSION) + if (glibc_version_less_than("2.27")) + return in; std::wstring out; unsigned I = 0; bool seen_num_start = false; diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp index fca8dcd2f40f1..983a3db6a197b 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp @@ -61,20 +61,6 @@ class Fwt : std::moneypunct_byname(nm, refs) {} }; -#if defined(_CS_GNU_LIBC_VERSION) -static bool glibc_version_less_than(char const* version) { - std::string test_version = std::string("glibc ") + version; - - size_t n = confstr(_CS_GNU_LIBC_VERSION, nullptr, (size_t)0); - char *current_version = new char[n]; - confstr(_CS_GNU_LIBC_VERSION, current_version, n); - - bool result = strverscmp(current_version, test_version.c_str()) < 0; - delete[] current_version; - return result; -} -#endif - int main(int, char**) { { diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/decimal_point.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/decimal_point.pass.cpp index bec52e6abc0dd..783e4ec86da6a 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/decimal_point.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/decimal_point.pass.cpp @@ -110,15 +110,12 @@ int main(int, char**) } // GLIBC 2.23 uses '.' as the decimal point while other C libraries use ',' // GLIBC 2.27 corrects this -#ifndef TEST_GLIBC_PREREQ -#define TEST_GLIBC_PREREQ(x, y) 0 -#endif -#if !defined(TEST_HAS_GLIBC) || TEST_GLIBC_PREREQ(2, 27) +#if defined(_CS_GNU_LIBC_VERSION) + const char sep = glibc_version_less_than("2.27") ? '.' : ','; + const wchar_t wsep = glibc_version_less_than("2.27") ? L'.' : L','; +#else const char sep = ','; const wchar_t wsep = L','; -#else - const char sep = '.'; - const wchar_t wsep = L'.'; #endif { Fnf f(LOCALE_ru_RU_UTF_8, 1); diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp index c789c4e5b1b1e..aa60055a96c2e 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp @@ -103,11 +103,8 @@ int main(int, char**) assert(f.thousands_sep() == ' '); } // The below tests work around GLIBC's use of U202F as mon_thousands_sep. -#ifndef TEST_GLIBC_PREREQ -#define TEST_GLIBC_PREREQ(x, y) 0 -#endif -#if defined(TEST_HAS_GLIBC) && TEST_GLIBC_PREREQ(2, 27) - const wchar_t fr_sep = L'\u202F'; +#if defined(_CS_GNU_LIBC_VERSION) + const wchar_t fr_sep = glibc_version_less_than("2.27") ? L' ' : L'\u202F'; #else const wchar_t fr_sep = L' '; #endif @@ -123,18 +120,15 @@ int main(int, char**) // and U002E as mon_decimal_point. // TODO: Fix thousands_sep for 'char'. // related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=16006 -#ifndef TEST_HAS_GLIBC +#if defined(_CS_GNU_LIBC_VERSION) const char sep = ' '; - const wchar_t wsep = L' '; -#elif TEST_GLIBC_PREREQ(2, 27) // FIXME libc++ specifically works around \u00A0 by translating it into // a regular space. - const char sep = ' '; - const wchar_t wsep = L'\u202F'; + const wchar_t wsep = glibc_version_less_than("2.27") ? L'\u00A0' : L'\u202F'; #else + const char sep = ' '; // FIXME libc++ specifically works around \u00A0 by translating it into // a regular space. - const char sep = ' '; const wchar_t wsep = L'\u00A0'; #endif { diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp index 2569002402727..b1c814fd84dbc 100644 --- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp @@ -56,14 +56,10 @@ int main(int, char**) } { std::locale l(LOCALE_fr_FR_UTF_8); -#if defined(TEST_HAS_GLIBC) - const char sep = ' '; // The below tests work around GLIBC's use of U202F as LC_NUMERIC thousands_sep. -# if TEST_GLIBC_PREREQ(2, 27) - const wchar_t wsep = L'\u202f'; -# else - const wchar_t wsep = L' '; -# endif +#if defined(_CS_GNU_LIBC_VERSION) + const char sep = ' '; + const wchar_t wsep = glibc_version_less_than("2.27") ? L' ' : L'\u202f'; #else const char sep = ','; const wchar_t wsep = L','; diff --git a/libcxx/test/support/platform_support.h b/libcxx/test/support/platform_support.h index e897ba7625ca6..9290dbd7b76bc 100644 --- a/libcxx/test/support/platform_support.h +++ b/libcxx/test/support/platform_support.h @@ -110,4 +110,18 @@ std::wstring get_wide_temp_file_name() #endif // __CloudABI__ +#if defined(_CS_GNU_LIBC_VERSION) +inline bool glibc_version_less_than(char const* version) { + std::string test_version = std::string("glibc ") + version; + + size_t n = confstr(_CS_GNU_LIBC_VERSION, nullptr, (size_t)0); + char *current_version = new char[n]; + confstr(_CS_GNU_LIBC_VERSION, current_version, n); + + bool result = strverscmp(current_version, test_version.c_str()) < 0; + delete[] current_version; + return result; +} +#endif // _CS_GNU_LIBC_VERSION + #endif // PLATFORM_SUPPORT_H From d9881e6e27bc7fa882742b13d43bb6d491dfc1ea Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Wed, 7 Oct 2020 18:13:26 -0700 Subject: [PATCH 283/321] [IRMover] Avoid materializing global value that belongs to not-yet-linked module We saw the same assertion failure mentioned here https://bugs.llvm.org/show_bug.cgi?id=42063 in our internal tests. The failure happens in the same circumstance as D47898 and D66814 where uniqueing of DICompositeTypes causes `Mapper::mapValue` to be called on GlobalValues(`G`) from a not-yet-linked module(`M`). The following type-mapping for `G` may not complete correctly (fail to unique types etc. depending on the the complexity of the types) because IRLinker::computeTypeMapping is not done for `M` in this path. D47898 and D66814 fixed some type-mapping issue after Mapper::mapValue is called on `G`. However, it seems it did not handle some complex cases. I think we should delay linking globals like `G` until its owing module is linked. In this way, we could save unnecessary type mapping and prune these corner cases. It is also supposed to reduce the total number of structs ending up in the combined module. D47898 is reverted (its test is kept) because it regresses the test case here. D66814 could also be reverted (the `check-all` looks good). But it looks reasonable anyway, so I thought I should keep it. Also tested the patch with clang self-host regularLTO/ThinLTO build, things look good as well. Reviewed By: tejohnson Differential Revision: https://reviews.llvm.org/D87001 --- llvm/lib/Linker/IRMover.cpp | 18 ++--- .../LTO/X86/Inputs/type-mapping-bug4_0.ll | 11 +++ .../LTO/X86/Inputs/type-mapping-bug4_1.ll | 55 +++++++++++++ llvm/test/LTO/X86/type-mapping-bug4.ll | 77 +++++++++++++++++++ 4 files changed, 151 insertions(+), 10 deletions(-) create mode 100644 llvm/test/LTO/X86/Inputs/type-mapping-bug4_0.ll create mode 100644 llvm/test/LTO/X86/Inputs/type-mapping-bug4_1.ll create mode 100644 llvm/test/LTO/X86/type-mapping-bug4.ll diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp index 186ddb3d2b81b..cb4146a05fdf5 100644 --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -242,15 +242,6 @@ Type *TypeMapTy::get(Type *Ty, SmallPtrSet &Visited) { bool IsUniqued = !isa(Ty) || cast(Ty)->isLiteral(); if (!IsUniqued) { - StructType *STy = cast(Ty); - // This is actually a type from the destination module, this can be reached - // when this type is loaded in another module, added to DstStructTypesSet, - // and then we reach the same type in another module where it has not been - // added to MappedTypes. (PR37684) - if (STy->getContext().isODRUniquingDebugTypes() && !STy->isOpaque() && - DstStructTypesSet.hasType(STy)) - return *Entry = STy; - #ifndef NDEBUG for (auto &Pair : MappedTypes) { assert(!(Pair.first != Ty && Pair.second == Ty) && @@ -258,7 +249,7 @@ Type *TypeMapTy::get(Type *Ty, SmallPtrSet &Visited) { } #endif - if (!Visited.insert(STy).second) { + if (!Visited.insert(cast(Ty)).second) { StructType *DTy = StructType::create(Ty->getContext()); return *Entry = DTy; } @@ -579,6 +570,13 @@ Value *IRLinker::materialize(Value *V, bool ForIndirectSymbol) { if (!SGV) return nullptr; + // When linking a global from other modules than source & dest, skip + // materializing it because it would be mapped later when its containing + // module is linked. Linking it now would potentially pull in many types that + // may not be mapped properly. + if (SGV->getParent() != &DstM && SGV->getParent() != SrcM.get()) + return nullptr; + Expected NewProto = linkGlobalValueProto(SGV, ForIndirectSymbol); if (!NewProto) { setError(NewProto.takeError()); diff --git a/llvm/test/LTO/X86/Inputs/type-mapping-bug4_0.ll b/llvm/test/LTO/X86/Inputs/type-mapping-bug4_0.ll new file mode 100644 index 0000000000000..9dc490ed1a1e1 --- /dev/null +++ b/llvm/test/LTO/X86/Inputs/type-mapping-bug4_0.ll @@ -0,0 +1,11 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%class.CWBD = type { float } +%"class.std::_Unique_ptr_base" = type { %class.CWBD* } + +%class.CB = type opaque + +!llvm.module.flags = !{!0, !1} +!0 = !{i32 1, !"ThinLTO", i32 0} +!1 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/llvm/test/LTO/X86/Inputs/type-mapping-bug4_1.ll b/llvm/test/LTO/X86/Inputs/type-mapping-bug4_1.ll new file mode 100644 index 0000000000000..54fd9d1b4bff3 --- /dev/null +++ b/llvm/test/LTO/X86/Inputs/type-mapping-bug4_1.ll @@ -0,0 +1,55 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%class.CCSM = type opaque +%class.CWBD = type { float } + +%"class.std::_Unique_ptr_base" = type { %class.CWBD* } + +%class.CB = type { %"class.std::unique_ptr_base.1" } +; (stage1.1) +; %class.std::unique_ptr_base.1(t1.o) is mapped to %class.std::unique_ptr_base(t0.o) +; %class.CCSM(t1.o) is mapped to %class.CWBD(t0.o) +%"class.std::unique_ptr_base.1" = type { %class.CCSM* } + +; (stage1.2) +; %class.CCSM(t1.o) -> %class.CWBD(t0.o) mapping of stage1.1 maps this to +; "declare void @h(%class.CWBD*)" +declare void @h(%class.CCSM*) +define void @j() { + call void @h(%class.CCSM* undef) + ret void +} + +define void @a() { + ; Without the fix in D87001 to delay materialization of @d until its module is linked + ; (stage1.3) + ; mapping `%class.CB* undef` creates the first instance of %class.CB (%class.CB). + ; (stage2) + ; mapping `!6` starts the stage2, during which second instance of %class.CB (%class.CB.1) + ; is created for the mapped @d declaration. + ; define void @d(%class.CB.1*) + ; After this, %class.CB (t2.o) (aka %class.CB.1) and + ; %"class.std::unique_ptr_base.2" (t2.o) are added to DstStructTypesSet. + call void @llvm.dbg.value(metadata %class.CB* undef, metadata !6, metadata !DIExpression()), !dbg !4 + ret void +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.module.flags = !{!0, !1} +!llvm.dbg.cu = !{!2} +!0 = !{i32 1, !"ThinLTO", i32 0} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3) +!3 = !DIFile(filename: "f2", directory: "") + +!4 = !DILocation(line: 117, column: 34, scope: !7) + +; This DICompositeType refers to !5 in type-mapping-bug4.ll +!5 = !DICompositeType(tag: DW_TAG_structure_type, flags: DIFlagFwdDecl, identifier: "SHARED") + +!6 = !DILocalVariable(name: "this", arg: 1, scope: !7, flags: DIFlagArtificial | DIFlagObjectPointer) +!7 = distinct !DISubprogram(name: "a", type: !8, unit: !2) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !5} diff --git a/llvm/test/LTO/X86/type-mapping-bug4.ll b/llvm/test/LTO/X86/type-mapping-bug4.ll new file mode 100644 index 0000000000000..bdd687cfbefdc --- /dev/null +++ b/llvm/test/LTO/X86/type-mapping-bug4.ll @@ -0,0 +1,77 @@ +; RUN: opt -module-summary -o %t0.o %S/Inputs/type-mapping-bug4_0.ll +; RUN: opt -module-summary -o %t1.o %S/Inputs/type-mapping-bug4_1.ll +; RUN: opt -module-summary -o %t2.o %s +; RUN: llvm-lto2 run -save-temps -o %t3 %t0.o %t1.o %t2.o -r %t1.o,a,px -r %t2.o,d,px -r %t1.o,h,x -r %t2.o,h,x -r %t1.o,j,px +; RUN: llvm-dis < %t3.0.0.preopt.bc | FileCheck %s + +; stage0: linking t0.o +; stage1: linking t1.o +; stage2: during linking t1.o, mapping @d +; stage3: linking t2.o + +; Stage0 is not described because it is not interesting for the purpose of this test. +; Stage1 and stage2 are described in type-mapping-bug4_1.ll. +; Stage3 is described in this file. + +; CHECK: %class.CCSM = type opaque +; CHECK: %class.CB = type { %"class.std::unique_ptr_base.1" } +; CHECK: %"class.std::unique_ptr_base.1" = type { %class.CCSM* } + +; CHECK: define void @j() { +; CHECK: call void @h(%class.CCSM* undef) +; CHECK: ret void +; CHECK: } + +; CHECK: declare void @h(%class.CCSM*) + +; CHECK: define void @a() { +; CHECK: call void @llvm.dbg.value(metadata %class.CB* undef, metadata !10, metadata !DIExpression()) +; CHECK: ret void +; CHECK: } + +; CHECK: declare void @llvm.dbg.value(metadata, metadata, metadata) #0 + +; CHECK: define void @d(%class.CB* %0) { +; CHECK: %2 = getelementptr inbounds %class.CB, %class.CB* undef, i64 0, i32 0, i32 0 +; CHECK: ret void +; CHECK: } + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; (stage3) Remapping this type returns itself due to D47898 and stage1.3 +%class.CB = type { %"class.std::unique_ptr_base.2" } + +; (stage3) Remapping this type returns itself due to D47898 and stage2 +%"class.std::unique_ptr_base.2" = type { %class.CCSM* } + +%class.CCSM = type opaque + +; (stage3) computeTypeMapping add the mapping %class.CCSM -> %class.CWBD due to stage1.2 +declare void @h(%class.CCSM*) + +define void @d(%class.CB*) { + ; Without the fix in D87001 to delay materialization of @d until its module is linked + ; (stage3) + ; * SourceElementType of getelementptr is remapped to itself. + ; * ResultElementType of getelementptr is incorrectly remapped to %class.CWBD*. + ; Its type should be %class.CCSM*. + %2 = getelementptr inbounds %class.CB, %class.CB* undef, i64 0, i32 0, i32 0 + ret void +} + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!0, !1} +!0 = !{i32 1, !"ThinLTO", i32 0} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, retainedTypes: !4) +!3 = !DIFile(filename: "f1", directory: "") +!4 = !{!5} + +; This DICompositeType is referenced by !5 in Inputs/type-mapping-bug4_1.ll +; causing the function type in !7 to be added to its module. +!5 = !DICompositeType(tag: DW_TAG_structure_type, templateParams: !6, identifier: "SHARED") +!6 = !{!7} + +; The reference to d and %class.CB that gets loaded into %t1.o +!7 = !DITemplateValueParameter(value: void (%class.CB*)* @d) From 93db4a8ce6261dc36e233f5e0b60cfbb3ea1bd8f Mon Sep 17 00:00:00 2001 From: Geoffrey Martin-Noble Date: Wed, 7 Oct 2020 18:24:24 -0700 Subject: [PATCH 284/321] Remove unused variables These are unused since https://reviews.llvm.org/rG35cb45c533fb76dcfc9f44b4e8bbd5d8a855ed2a causing `-Wunused` warnings. Differential Revision: https://reviews.llvm.org/D89022 --- llvm/lib/CodeGen/ImplicitNullChecks.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp index 20666f27aab9e..d0a4511e90e72 100644 --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -369,10 +369,6 @@ ImplicitNullChecks::SuitabilityResult ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI, unsigned PointerReg, ArrayRef PrevInsts) { - int64_t Offset; - bool OffsetIsScalable; - const MachineOperand *BaseOp; - // Implementation restriction for faulting_op insertion // TODO: This could be relaxed if we find a test case which warrants it. if (MI.getDesc().getNumDefs() > 1) From dc3dba7dbdab207a8e55171860d3caa712aa5dfc Mon Sep 17 00:00:00 2001 From: Scott Constable Date: Wed, 7 Oct 2020 18:30:37 -0700 Subject: [PATCH 285/321] [X86] Move findDeadCallerSavedReg() into X86RegisterInfo The findDeadCallerSavedReg() function has utility outside of X86FrameLowering.cpp Differential Revision: https://reviews.llvm.org/D88924 --- llvm/lib/Target/X86/X86FrameLowering.cpp | 58 +----------------------- llvm/lib/Target/X86/X86RegisterInfo.cpp | 50 ++++++++++++++++++++ llvm/lib/Target/X86/X86RegisterInfo.h | 6 +++ 3 files changed, 58 insertions(+), 56 deletions(-) diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 6d196a6228373..e92384ce63857 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -148,60 +148,6 @@ static unsigned getLEArOpcode(bool IsLP64) { return IsLP64 ? X86::LEA64r : X86::LEA32r; } -/// findDeadCallerSavedReg - Return a caller-saved register that isn't live -/// when it reaches the "return" instruction. We can then pop a stack object -/// to this register without worry about clobbering it. -static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const X86RegisterInfo *TRI, - bool Is64Bit) { - const MachineFunction *MF = MBB.getParent(); - if (MF->callsEHReturn()) - return 0; - - const TargetRegisterClass &AvailableRegs = *TRI->getGPRsForTailCall(*MF); - - if (MBBI == MBB.end()) - return 0; - - switch (MBBI->getOpcode()) { - default: return 0; - case TargetOpcode::PATCHABLE_RET: - case X86::RET: - case X86::RETL: - case X86::RETQ: - case X86::RETIL: - case X86::RETIQ: - case X86::TCRETURNdi: - case X86::TCRETURNri: - case X86::TCRETURNmi: - case X86::TCRETURNdi64: - case X86::TCRETURNri64: - case X86::TCRETURNmi64: - case X86::EH_RETURN: - case X86::EH_RETURN64: { - SmallSet Uses; - for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MBBI->getOperand(i); - if (!MO.isReg() || MO.isDef()) - continue; - Register Reg = MO.getReg(); - if (!Reg) - continue; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Uses.insert(*AI); - } - - for (auto CS : AvailableRegs) - if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && - CS != X86::ESP) - return CS; - } - } - - return 0; -} - static bool isEAXLiveIn(MachineBasicBlock &MBB) { for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) { unsigned Reg = RegMask.PhysReg; @@ -288,7 +234,7 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, if (isSub && !isEAXLiveIn(MBB)) Reg = Rax; else - Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); + Reg = TRI->findDeadCallerSavedReg(MBB, MBBI); unsigned MovRIOpc = Is64Bit ? X86::MOV64ri : X86::MOV32ri; unsigned AddSubRROpc = @@ -345,7 +291,7 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, // need to find a dead register when using pop. unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) - : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); + : TRI->findDeadCallerSavedReg(MBB, MBBI); if (Reg) { unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index f456728cf47b8..2636cbd27469e 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -18,6 +18,7 @@ #include "X86Subtarget.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -790,6 +791,55 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } +unsigned X86RegisterInfo::findDeadCallerSavedReg( + MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const { + const MachineFunction *MF = MBB.getParent(); + if (MF->callsEHReturn()) + return 0; + + const TargetRegisterClass &AvailableRegs = *getGPRsForTailCall(*MF); + + if (MBBI == MBB.end()) + return 0; + + switch (MBBI->getOpcode()) { + default: + return 0; + case TargetOpcode::PATCHABLE_RET: + case X86::RET: + case X86::RETL: + case X86::RETQ: + case X86::RETIL: + case X86::RETIQ: + case X86::TCRETURNdi: + case X86::TCRETURNri: + case X86::TCRETURNmi: + case X86::TCRETURNdi64: + case X86::TCRETURNri64: + case X86::TCRETURNmi64: + case X86::EH_RETURN: + case X86::EH_RETURN64: { + SmallSet Uses; + for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { + MachineOperand &MO = MBBI->getOperand(I); + if (!MO.isReg() || MO.isDef()) + continue; + Register Reg = MO.getReg(); + if (!Reg) + continue; + for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI) + Uses.insert(*AI); + } + + for (auto CS : AvailableRegs) + if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && CS != X86::ESP) + return CS; + } + } + + return 0; +} + Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const X86FrameLowering *TFI = getFrameLowering(MF); return TFI->hasFP(MF) ? FramePtr : StackPtr; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h index 3435c0a10b047..56cb8909c5203 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -128,6 +128,12 @@ class X86RegisterInfo final : public X86GenRegisterInfo { int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; + /// findDeadCallerSavedReg - Return a caller-saved register that isn't live + /// when it reaches the "return" instruction. We can then pop a stack object + /// to this register without worry about clobbering it. + unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI) const; + // Debug information queries. Register getFrameRegister(const MachineFunction &MF) const override; unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const; From ad4313fc833d2a1b5ae4b3ffe92366ba61a1232c Mon Sep 17 00:00:00 2001 From: Scott Constable Date: Wed, 7 Oct 2020 18:32:45 -0700 Subject: [PATCH 286/321] [X86] Fix bug in -mlvi-cfi that may clobber a live register Fix for this bug: https://bugs.llvm.org/show_bug.cgi?id=47740 The fix uses the existing findDeadCallerSavedReg() function instead of a hacky heuristic to find a scratch register to clobber. Differential Revision: https://reviews.llvm.org/D88925 --- .../X86/X86LoadValueInjectionRetHardening.cpp | 81 +++++++------------ llvm/test/CodeGen/X86/lvi-hardening-ret.ll | 21 +++-- 2 files changed, 44 insertions(+), 58 deletions(-) diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp index 6e1134a259501..7b6276c1d87e0 100644 --- a/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp +++ b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp @@ -72,62 +72,39 @@ bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction( ++NumFunctionsConsidered; const X86RegisterInfo *TRI = Subtarget->getRegisterInfo(); const X86InstrInfo *TII = Subtarget->getInstrInfo(); - unsigned ClobberReg = X86::NoRegister; - std::bitset UnclobberableGR64s; - UnclobberableGR64s.set(X86::RSP); // can't clobber stack pointer - UnclobberableGR64s.set(X86::RIP); // can't clobber instruction pointer - UnclobberableGR64s.set(X86::RAX); // used for function return - UnclobberableGR64s.set(X86::RDX); // used for function return - - // We can clobber any register allowed by the function's calling convention. - for (const MCPhysReg *PR = TRI->getCalleeSavedRegs(&MF); auto Reg = *PR; ++PR) - UnclobberableGR64s.set(Reg); - for (auto &Reg : X86::GR64RegClass) { - if (!UnclobberableGR64s.test(Reg)) { - ClobberReg = Reg; - break; - } - } - - if (ClobberReg != X86::NoRegister) { - LLVM_DEBUG(dbgs() << "Selected register " - << Subtarget->getRegisterInfo()->getRegAsmName(ClobberReg) - << " to clobber\n"); - } else { - LLVM_DEBUG(dbgs() << "Could not find a register to clobber\n"); - } bool Modified = false; for (auto &MBB : MF) { - if (MBB.empty()) - continue; - - MachineInstr &MI = MBB.back(); - if (MI.getOpcode() != X86::RETQ) - continue; - - if (ClobberReg != X86::NoRegister) { - MBB.erase_instr(&MI); - BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::POP64r)) - .addReg(ClobberReg, RegState::Define) - .setMIFlag(MachineInstr::FrameDestroy); - BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::LFENCE)); - BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::JMP64r)) - .addReg(ClobberReg); - } else { - // In case there is no available scratch register, we can still read from - // RSP to assert that RSP points to a valid page. The write to RSP is - // also helpful because it verifies that the stack's write permissions - // are intact. - MachineInstr *Fence = BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE)); - addRegOffset(BuildMI(MBB, Fence, DebugLoc(), TII->get(X86::SHL64mi)), - X86::RSP, false, 0) - .addImm(0) - ->addRegisterDead(X86::EFLAGS, TRI); + for (auto MBBI = MBB.begin(); MBBI != MBB.end(); ++MBBI) { + if (MBBI->getOpcode() != X86::RETQ) + continue; + + unsigned ClobberReg = TRI->findDeadCallerSavedReg(MBB, MBBI); + if (ClobberReg != X86::NoRegister) { + BuildMI(MBB, MBBI, DebugLoc(), TII->get(X86::POP64r)) + .addReg(ClobberReg, RegState::Define) + .setMIFlag(MachineInstr::FrameDestroy); + BuildMI(MBB, MBBI, DebugLoc(), TII->get(X86::LFENCE)); + BuildMI(MBB, MBBI, DebugLoc(), TII->get(X86::JMP64r)) + .addReg(ClobberReg); + MBB.erase(MBBI); + } else { + // In case there is no available scratch register, we can still read + // from RSP to assert that RSP points to a valid page. The write to RSP + // is also helpful because it verifies that the stack's write + // permissions are intact. + MachineInstr *Fence = + BuildMI(MBB, MBBI, DebugLoc(), TII->get(X86::LFENCE)); + addRegOffset(BuildMI(MBB, Fence, DebugLoc(), TII->get(X86::SHL64mi)), + X86::RSP, false, 0) + .addImm(0) + ->addRegisterDead(X86::EFLAGS, TRI); + } + + ++NumFences; + Modified = true; + break; } - - ++NumFences; - Modified = true; } if (Modified) diff --git a/llvm/test/CodeGen/X86/lvi-hardening-ret.ll b/llvm/test/CodeGen/X86/lvi-hardening-ret.ll index 9f2b028b30344..0ff702dc7df97 100644 --- a/llvm/test/CodeGen/X86/lvi-hardening-ret.ll +++ b/llvm/test/CodeGen/X86/lvi-hardening-ret.ll @@ -41,9 +41,9 @@ entry: %add = add nsw i32 %0, %1 ret i32 %add ; CHECK-NOT: retq -; CHECK: shlq $0, (%{{[^ ]*}}) +; CHECK: popq %rcx ; CHECK-NEXT: lfence -; CHECK-NEXT: retq +; CHECK-NEXT: jmpq *%rcx } ; Function Attrs: noinline nounwind optnone uwtable @@ -52,9 +52,9 @@ define dso_local preserve_mostcc void @preserve_most() #0 { entry: ret void ; CHECK-NOT: retq -; CHECK: popq %r11 +; CHECK: popq %rax ; CHECK-NEXT: lfence -; CHECK-NEXT: jmpq *%r11 +; CHECK-NEXT: jmpq *%rax } ; Function Attrs: noinline nounwind optnone uwtable @@ -63,9 +63,18 @@ define dso_local preserve_allcc void @preserve_all() #0 { entry: ret void ; CHECK-NOT: retq -; CHECK: popq %r11 +; CHECK: popq %rax ; CHECK-NEXT: lfence -; CHECK-NEXT: jmpq *%r11 +; CHECK-NEXT: jmpq *%rax +} + +define { i64, i128 } @ret_i64_i128() #0 { +; CHECK-LABEL: ret_i64_i128: + ret { i64, i128 } { i64 1, i128 36893488147419103235 } +; CHECK-NOT: retq +; CHECK: popq %rsi +; CHECK-NEXT: lfence +; CHECK-NEXT: jmpq *%rsi } attributes #0 = { "target-features"="+lvi-cfi" } From 31611721686760fe59c91a84b025e4dee94d1662 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 7 Oct 2020 16:24:33 -0700 Subject: [PATCH 287/321] BPF: fix incorrect DAG2DAG load optimization Currently, bpf backend Instruction section DAG2DAG phase has an optimization to replace loading constant struct memeber or array element with direct values. The reason is that these locally defined struct or array variables may have their initial values stored in a readonly section and early bpf ecosystem is not able to handle such cases. Bpf ecosystem now can not only handle readonly sections, but also global variables. global variable can also have initialized data and global variable may or may not be constant, i.e., global variable data can be put in .data section or .rodata section. This exposed a bug in DAG2DAG Load optimization as it did not check whether the global variable is constant or not. This patch fixed the bug by checking whether global variable, representing the initial data, is constant or not and will not do optimization if it is not a constant. Another bug is also fixed in this patch to check whether the load is simple (not volatile/atomic) or not. If it is not simple, we will not do optimization. To summary for globals: - struct t var = { ... } ; // no load optimization - const struct t var = { ... }; // load optimization is possible - volatile const struct t var = { ... }; // no load optimization Differential Revision: https://reviews.llvm.org/D89021 --- llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp | 4 ++-- llvm/test/CodeGen/BPF/rodata_6.ll | 25 +++++++++++++++++++++++++ llvm/test/CodeGen/BPF/rodata_7.ll | 25 +++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/BPF/rodata_6.ll create mode 100644 llvm/test/CodeGen/BPF/rodata_7.ll diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp index d407edfbd9660..77f565fb5957a 100644 --- a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -254,7 +254,7 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node, const LoadSDNode *LD = cast(Node); uint64_t size = LD->getMemOperand()->getSize(); - if (!size || size > 8 || (size & (size - 1))) + if (!size || size > 8 || (size & (size - 1)) || !LD->isSimple()) return; SDNode *LDAddrNode = LD->getOperand(1).getNode(); @@ -342,7 +342,7 @@ bool BPFDAGToDAGISel::getConstantFieldValue(const GlobalAddressSDNode *Node, unsigned char *ByteSeq) { const GlobalVariable *V = dyn_cast(Node->getGlobal()); - if (!V || !V->hasInitializer()) + if (!V || !V->hasInitializer() || !V->isConstant()) return false; const Constant *Init = V->getInitializer(); diff --git a/llvm/test/CodeGen/BPF/rodata_6.ll b/llvm/test/CodeGen/BPF/rodata_6.ll new file mode 100644 index 0000000000000..1af3d8dc230fe --- /dev/null +++ b/llvm/test/CodeGen/BPF/rodata_6.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=bpf < %s | FileCheck %s +; +; Source code: +; struct t1 { int a; }; +; struct t1 data = { .a = 3 }; +; int foo(void) { +; return data.a + 20; +; } +; Compilation flag: +; clang -target bpf -O2 -S -emit-llvm test.c + +%struct.t1 = type { i32 } + +@data = dso_local local_unnamed_addr global %struct.t1 { i32 3 }, align 4 + +; Function Attrs: norecurse nounwind readonly +define dso_local i32 @foo() local_unnamed_addr { +entry: + %0 = load i32, i32* getelementptr inbounds (%struct.t1, %struct.t1* @data, i64 0, i32 0), align 4 + %add = add nsw i32 %0, 20 +; CHECK: [[REG1:r[0-9]+]] = data ll +; CHECK: r0 = *(u32 *)([[REG1]] + 0) +; CHECK: r0 += 20 + ret i32 %add +} diff --git a/llvm/test/CodeGen/BPF/rodata_7.ll b/llvm/test/CodeGen/BPF/rodata_7.ll new file mode 100644 index 0000000000000..69969a1403026 --- /dev/null +++ b/llvm/test/CodeGen/BPF/rodata_7.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=bpf < %s | FileCheck %s +; +; Source code: +; struct t1 { int a; }; +; volatile const struct t1 data = { .a = 3 }; +; int foo(void) { +; return data.a + 20; +; } +; Compilation flag: +; clang -target bpf -O2 -S -emit-llvm test.c + +%struct.t1 = type { i32 } + +@data = dso_local constant %struct.t1 { i32 3 }, align 4 + +; Function Attrs: nofree norecurse nounwind +define dso_local i32 @foo() local_unnamed_addr { +entry: + %0 = load volatile i32, i32* getelementptr inbounds (%struct.t1, %struct.t1* @data, i64 0, i32 0), align 4 + %add = add nsw i32 %0, 20 +; CHECK: [[REG1:r[0-9]+]] = data ll +; CHECK: r0 = *(u32 *)([[REG1]] + 0) +; CHECK: r0 += 20 + ret i32 %add +} From a4961f0d8a2ec91e209b9d3ea7b03acac4f5a8b1 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 7 Oct 2020 22:23:08 -0400 Subject: [PATCH 288/321] Revert "[Support][unittests] Enforce alignment in ConvertUTFTest" This reverts commit 53b3873cf428fd78f1d92504cc20adf11181ead7. Seems to break SupportTests.exe's ConvertUTFTest.UTF16WrappersForConvertUTF16ToUTF8String on Windows. --- llvm/lib/Support/ConvertUTFWrapper.cpp | 2 -- llvm/unittests/Support/ConvertUTFTest.cpp | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Support/ConvertUTFWrapper.cpp b/llvm/lib/Support/ConvertUTFWrapper.cpp index d8d46712a5935..6ec567882ea6b 100644 --- a/llvm/lib/Support/ConvertUTFWrapper.cpp +++ b/llvm/lib/Support/ConvertUTFWrapper.cpp @@ -97,8 +97,6 @@ bool convertUTF16ToUTF8String(ArrayRef SrcBytes, std::string &Out) { const UTF16 *Src = reinterpret_cast(SrcBytes.begin()); const UTF16 *SrcEnd = reinterpret_cast(SrcBytes.end()); - assert((uintptr_t)Src % sizeof(UTF16) == 0); - // Byteswap if necessary. std::vector ByteSwapped; if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) { diff --git a/llvm/unittests/Support/ConvertUTFTest.cpp b/llvm/unittests/Support/ConvertUTFTest.cpp index b689e688f720e..83019722332d3 100644 --- a/llvm/unittests/Support/ConvertUTFTest.cpp +++ b/llvm/unittests/Support/ConvertUTFTest.cpp @@ -16,7 +16,7 @@ using namespace llvm; TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) { // Src is the look of disapproval. - alignas(UTF16) static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c"; + static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c"; ArrayRef Ref(Src, sizeof(Src) - 1); std::string Result; bool Success = convertUTF16ToUTF8String(Ref, Result); @@ -27,7 +27,7 @@ TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) { TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) { // Src is the look of disapproval. - alignas(UTF16) static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0"; + static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0"; ArrayRef Ref(Src, sizeof(Src) - 1); std::string Result; bool Success = convertUTF16ToUTF8String(Ref, Result); From 6dcbea877bffcd5871cf6002616a5dcdfece0b86 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 22 Sep 2020 09:34:46 -0700 Subject: [PATCH 289/321] [NewPM] Use PassInstrumentation for -verify-each This removes "VerifyEachPass" parameters from a lot of functions which is nice. Don't verify after special passes or VerifierPass. This introduces verification on loop and cgscc passes, verifying the corresponding function/module. Reviewed By: ychen Differential Revision: https://reviews.llvm.org/D88764 --- llvm/include/llvm/Passes/PassBuilder.h | 24 ++--- .../llvm/Passes/StandardInstrumentations.h | 14 ++- llvm/lib/Passes/PassBuilder.cpp | 102 ++++++++---------- llvm/lib/Passes/StandardInstrumentations.cpp | 45 +++++++- .../Other/new-pass-manager-verify-each.ll | 39 +++++++ llvm/test/Other/new-pass-manager.ll | 13 --- .../tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp | 4 +- llvm/tools/opt/NewPMDriver.cpp | 62 +++++------ .../unittests/IR/PassBuilderCallbacksTest.cpp | 78 +++++++------- polly/lib/Support/RegisterPasses.cpp | 6 +- 10 files changed, 217 insertions(+), 170 deletions(-) create mode 100644 llvm/test/Other/new-pass-manager-verify-each.ll diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 73ee6092d7472..aef9d9b6bb4cc 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -488,7 +488,6 @@ class PassBuilder { /// preferred when a pipeline is largely of one type, but one or just a few /// passes are of different types(See PassBuilder.cpp for examples). Error parsePassPipeline(ModulePassManager &MPM, StringRef PipelineText, - bool VerifyEachPass = true, bool DebugLogging = false); /// {{@ Parse a textual pass pipeline description into a specific PassManager @@ -499,13 +498,10 @@ class PassBuilder { /// /// function(lpass) Error parsePassPipeline(CGSCCPassManager &CGPM, StringRef PipelineText, - bool VerifyEachPass = true, bool DebugLogging = false); Error parsePassPipeline(FunctionPassManager &FPM, StringRef PipelineText, - bool VerifyEachPass = true, bool DebugLogging = false); Error parsePassPipeline(LoopPassManager &LPM, StringRef PipelineText, - bool VerifyEachPass = true, bool DebugLogging = false); /// @}} @@ -682,7 +678,7 @@ class PassBuilder { /// PassManagers and populate the passed ModulePassManager. void registerParseTopLevelPipelineCallback( const std::function, - bool VerifyEachPass, bool DebugLogging)> &C); + bool DebugLogging)> &C); /// Add PGOInstrumenation passes for O0 only. void addPGOInstrPassesForO0(ModulePassManager &MPM, bool DebugLogging, @@ -706,27 +702,27 @@ class PassBuilder { parsePipelineText(StringRef Text); Error parseModulePass(ModulePassManager &MPM, const PipelineElement &E, - bool VerifyEachPass, bool DebugLogging); + bool DebugLogging); Error parseCGSCCPass(CGSCCPassManager &CGPM, const PipelineElement &E, - bool VerifyEachPass, bool DebugLogging); + bool DebugLogging); Error parseFunctionPass(FunctionPassManager &FPM, const PipelineElement &E, - bool VerifyEachPass, bool DebugLogging); + bool DebugLogging); Error parseLoopPass(LoopPassManager &LPM, const PipelineElement &E, - bool VerifyEachPass, bool DebugLogging); + bool DebugLogging); bool parseAAPassName(AAManager &AA, StringRef Name); Error parseLoopPassPipeline(LoopPassManager &LPM, ArrayRef Pipeline, - bool VerifyEachPass, bool DebugLogging); + bool DebugLogging); Error parseFunctionPassPipeline(FunctionPassManager &FPM, ArrayRef Pipeline, - bool VerifyEachPass, bool DebugLogging); + bool DebugLogging); Error parseCGSCCPassPipeline(CGSCCPassManager &CGPM, ArrayRef Pipeline, - bool VerifyEachPass, bool DebugLogging); + bool DebugLogging); Error parseModulePassPipeline(ModulePassManager &MPM, ArrayRef Pipeline, - bool VerifyEachPass, bool DebugLogging); + bool DebugLogging); void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, OptimizationLevel Level, bool RunProfileGen, bool IsCS, @@ -759,7 +755,7 @@ class PassBuilder { 2> ModulePipelineParsingCallbacks; SmallVector, - bool VerifyEachPass, bool DebugLogging)>, + bool DebugLogging)>, 2> TopLevelPipelineParsingCallbacks; // CGSCC callbacks diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 9d03aeb6cec46..f7067c88d28e9 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -213,6 +213,14 @@ class IRChangePrinter : public ChangePrinter { raw_ostream &Out; }; +class VerifyInstrumentation { + bool DebugLogging; + +public: + VerifyInstrumentation(bool DebugLogging) : DebugLogging(DebugLogging) {} + void registerCallbacks(PassInstrumentationCallbacks &PIC); +}; + /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -222,9 +230,13 @@ class StandardInstrumentations { OptNoneInstrumentation OptNone; PreservedCFGCheckerInstrumentation PreservedCFGChecker; IRChangePrinter PrintChangedIR; + VerifyInstrumentation Verify; + + bool VerifyEach; public: - StandardInstrumentations(bool DebugLogging) : PrintPass(DebugLogging) {} + StandardInstrumentations(bool DebugLogging, bool VerifyEach = false) + : PrintPass(DebugLogging), Verify(DebugLogging), VerifyEach(VerifyEach) {} void registerCallbacks(PassInstrumentationCallbacks &PIC); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 92724ed22d02d..b786e3dab9379 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -2133,7 +2133,7 @@ PassBuilder::parsePipelineText(StringRef Text) { Error PassBuilder::parseModulePass(ModulePassManager &MPM, const PipelineElement &E, - bool VerifyEachPass, bool DebugLogging) { + bool DebugLogging) { auto &Name = E.Name; auto &InnerPipeline = E.InnerPipeline; @@ -2141,32 +2141,31 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, if (!InnerPipeline.empty()) { if (Name == "module") { ModulePassManager NestedMPM(DebugLogging); - if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline, - VerifyEachPass, DebugLogging)) + if (auto Err = + parseModulePassPipeline(NestedMPM, InnerPipeline, DebugLogging)) return Err; MPM.addPass(std::move(NestedMPM)); return Error::success(); } if (Name == "cgscc") { CGSCCPassManager CGPM(DebugLogging); - if (auto Err = parseCGSCCPassPipeline(CGPM, InnerPipeline, VerifyEachPass, - DebugLogging)) + if (auto Err = parseCGSCCPassPipeline(CGPM, InnerPipeline, DebugLogging)) return Err; MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); return Error::success(); } if (Name == "function") { FunctionPassManager FPM(DebugLogging); - if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline, - VerifyEachPass, DebugLogging)) + if (auto Err = + parseFunctionPassPipeline(FPM, InnerPipeline, DebugLogging)) return Err; MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); return Error::success(); } if (auto Count = parseRepeatPassName(Name)) { ModulePassManager NestedMPM(DebugLogging); - if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline, - VerifyEachPass, DebugLogging)) + if (auto Err = + parseModulePassPipeline(NestedMPM, InnerPipeline, DebugLogging)) return Err; MPM.addPass(createRepeatedPass(*Count, std::move(NestedMPM))); return Error::success(); @@ -2315,8 +2314,7 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, } Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, - const PipelineElement &E, bool VerifyEachPass, - bool DebugLogging) { + const PipelineElement &E, bool DebugLogging) { auto &Name = E.Name; auto &InnerPipeline = E.InnerPipeline; @@ -2324,8 +2322,8 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, if (!InnerPipeline.empty()) { if (Name == "cgscc") { CGSCCPassManager NestedCGPM(DebugLogging); - if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, - VerifyEachPass, DebugLogging)) + if (auto Err = + parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, DebugLogging)) return Err; // Add the nested pass manager with the appropriate adaptor. CGPM.addPass(std::move(NestedCGPM)); @@ -2333,8 +2331,8 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, } if (Name == "function") { FunctionPassManager FPM(DebugLogging); - if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline, - VerifyEachPass, DebugLogging)) + if (auto Err = + parseFunctionPassPipeline(FPM, InnerPipeline, DebugLogging)) return Err; // Add the nested pass manager with the appropriate adaptor. CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); @@ -2342,16 +2340,16 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, } if (auto Count = parseRepeatPassName(Name)) { CGSCCPassManager NestedCGPM(DebugLogging); - if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, - VerifyEachPass, DebugLogging)) + if (auto Err = + parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, DebugLogging)) return Err; CGPM.addPass(createRepeatedPass(*Count, std::move(NestedCGPM))); return Error::success(); } if (auto MaxRepetitions = parseDevirtPassName(Name)) { CGSCCPassManager NestedCGPM(DebugLogging); - if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, - VerifyEachPass, DebugLogging)) + if (auto Err = + parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, DebugLogging)) return Err; CGPM.addPass( createDevirtSCCRepeatedPass(std::move(NestedCGPM), *MaxRepetitions)); @@ -2429,7 +2427,7 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, const PipelineElement &E, - bool VerifyEachPass, bool DebugLogging) { + bool DebugLogging) { auto &Name = E.Name; auto &InnerPipeline = E.InnerPipeline; @@ -2437,8 +2435,8 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, if (!InnerPipeline.empty()) { if (Name == "function") { FunctionPassManager NestedFPM(DebugLogging); - if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline, - VerifyEachPass, DebugLogging)) + if (auto Err = + parseFunctionPassPipeline(NestedFPM, InnerPipeline, DebugLogging)) return Err; // Add the nested pass manager with the appropriate adaptor. FPM.addPass(std::move(NestedFPM)); @@ -2446,8 +2444,7 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, } if (Name == "loop" || Name == "loop-mssa") { LoopPassManager LPM(DebugLogging); - if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass, - DebugLogging)) + if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline, DebugLogging)) return Err; // Add the nested pass manager with the appropriate adaptor. bool UseMemorySSA = (Name == "loop-mssa"); @@ -2460,8 +2457,8 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, } if (auto Count = parseRepeatPassName(Name)) { FunctionPassManager NestedFPM(DebugLogging); - if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline, - VerifyEachPass, DebugLogging)) + if (auto Err = + parseFunctionPassPipeline(NestedFPM, InnerPipeline, DebugLogging)) return Err; FPM.addPass(createRepeatedPass(*Count, std::move(NestedFPM))); return Error::success(); @@ -2533,7 +2530,7 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, } Error PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E, - bool VerifyEachPass, bool DebugLogging) { + bool DebugLogging) { StringRef Name = E.Name; auto &InnerPipeline = E.InnerPipeline; @@ -2541,8 +2538,8 @@ Error PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E, if (!InnerPipeline.empty()) { if (Name == "loop") { LoopPassManager NestedLPM(DebugLogging); - if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline, - VerifyEachPass, DebugLogging)) + if (auto Err = + parseLoopPassPipeline(NestedLPM, InnerPipeline, DebugLogging)) return Err; // Add the nested pass manager with the appropriate adaptor. LPM.addPass(std::move(NestedLPM)); @@ -2550,8 +2547,8 @@ Error PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E, } if (auto Count = parseRepeatPassName(Name)) { LoopPassManager NestedLPM(DebugLogging); - if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline, - VerifyEachPass, DebugLogging)) + if (auto Err = + parseLoopPassPipeline(NestedLPM, InnerPipeline, DebugLogging)) return Err; LPM.addPass(createRepeatedPass(*Count, std::move(NestedLPM))); return Error::success(); @@ -2626,38 +2623,30 @@ bool PassBuilder::parseAAPassName(AAManager &AA, StringRef Name) { Error PassBuilder::parseLoopPassPipeline(LoopPassManager &LPM, ArrayRef Pipeline, - bool VerifyEachPass, bool DebugLogging) { for (const auto &Element : Pipeline) { - if (auto Err = parseLoopPass(LPM, Element, VerifyEachPass, DebugLogging)) + if (auto Err = parseLoopPass(LPM, Element, DebugLogging)) return Err; - // FIXME: No verifier support for Loop passes! } return Error::success(); } Error PassBuilder::parseFunctionPassPipeline(FunctionPassManager &FPM, ArrayRef Pipeline, - bool VerifyEachPass, bool DebugLogging) { for (const auto &Element : Pipeline) { - if (auto Err = - parseFunctionPass(FPM, Element, VerifyEachPass, DebugLogging)) + if (auto Err = parseFunctionPass(FPM, Element, DebugLogging)) return Err; - if (VerifyEachPass) - FPM.addPass(VerifierPass()); } return Error::success(); } Error PassBuilder::parseCGSCCPassPipeline(CGSCCPassManager &CGPM, ArrayRef Pipeline, - bool VerifyEachPass, bool DebugLogging) { for (const auto &Element : Pipeline) { - if (auto Err = parseCGSCCPass(CGPM, Element, VerifyEachPass, DebugLogging)) + if (auto Err = parseCGSCCPass(CGPM, Element, DebugLogging)) return Err; - // FIXME: No verifier support for CGSCC passes! } return Error::success(); } @@ -2677,13 +2666,10 @@ void PassBuilder::crossRegisterProxies(LoopAnalysisManager &LAM, Error PassBuilder::parseModulePassPipeline(ModulePassManager &MPM, ArrayRef Pipeline, - bool VerifyEachPass, bool DebugLogging) { for (const auto &Element : Pipeline) { - if (auto Err = parseModulePass(MPM, Element, VerifyEachPass, DebugLogging)) + if (auto Err = parseModulePass(MPM, Element, DebugLogging)) return Err; - if (VerifyEachPass) - MPM.addPass(VerifierPass()); } return Error::success(); } @@ -2693,7 +2679,7 @@ Error PassBuilder::parseModulePassPipeline(ModulePassManager &MPM, // pre-populate the analysis managers with target-specific stuff? Error PassBuilder::parsePassPipeline(ModulePassManager &MPM, StringRef PipelineText, - bool VerifyEachPass, bool DebugLogging) { + bool DebugLogging) { auto Pipeline = parsePipelineText(PipelineText); if (!Pipeline || Pipeline->empty()) return make_error( @@ -2714,7 +2700,7 @@ Error PassBuilder::parsePassPipeline(ModulePassManager &MPM, Pipeline = {{"function", {{"loop", std::move(*Pipeline)}}}}; } else { for (auto &C : TopLevelPipelineParsingCallbacks) - if (C(MPM, *Pipeline, VerifyEachPass, DebugLogging)) + if (C(MPM, *Pipeline, DebugLogging)) return Error::success(); // Unknown pass or pipeline name! @@ -2727,8 +2713,7 @@ Error PassBuilder::parsePassPipeline(ModulePassManager &MPM, } } - if (auto Err = - parseModulePassPipeline(MPM, *Pipeline, VerifyEachPass, DebugLogging)) + if (auto Err = parseModulePassPipeline(MPM, *Pipeline, DebugLogging)) return Err; return Error::success(); } @@ -2736,7 +2721,7 @@ Error PassBuilder::parsePassPipeline(ModulePassManager &MPM, // Primary pass pipeline description parsing routine for a \c CGSCCPassManager Error PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM, StringRef PipelineText, - bool VerifyEachPass, bool DebugLogging) { + bool DebugLogging) { auto Pipeline = parsePipelineText(PipelineText); if (!Pipeline || Pipeline->empty()) return make_error( @@ -2751,8 +2736,7 @@ Error PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM, .str(), inconvertibleErrorCode()); - if (auto Err = - parseCGSCCPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging)) + if (auto Err = parseCGSCCPassPipeline(CGPM, *Pipeline, DebugLogging)) return Err; return Error::success(); } @@ -2761,7 +2745,7 @@ Error PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM, // FunctionPassManager Error PassBuilder::parsePassPipeline(FunctionPassManager &FPM, StringRef PipelineText, - bool VerifyEachPass, bool DebugLogging) { + bool DebugLogging) { auto Pipeline = parsePipelineText(PipelineText); if (!Pipeline || Pipeline->empty()) return make_error( @@ -2776,8 +2760,7 @@ Error PassBuilder::parsePassPipeline(FunctionPassManager &FPM, .str(), inconvertibleErrorCode()); - if (auto Err = parseFunctionPassPipeline(FPM, *Pipeline, VerifyEachPass, - DebugLogging)) + if (auto Err = parseFunctionPassPipeline(FPM, *Pipeline, DebugLogging)) return Err; return Error::success(); } @@ -2785,15 +2768,14 @@ Error PassBuilder::parsePassPipeline(FunctionPassManager &FPM, // Primary pass pipeline description parsing routine for a \c LoopPassManager Error PassBuilder::parsePassPipeline(LoopPassManager &CGPM, StringRef PipelineText, - bool VerifyEachPass, bool DebugLogging) { + bool DebugLogging) { auto Pipeline = parsePipelineText(PipelineText); if (!Pipeline || Pipeline->empty()) return make_error( formatv("invalid pipeline '{0}'", PipelineText).str(), inconvertibleErrorCode()); - if (auto Err = - parseLoopPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging)) + if (auto Err = parseLoopPassPipeline(CGPM, *Pipeline, DebugLogging)) return Err; return Error::success(); @@ -2855,6 +2837,6 @@ bool PassBuilder::isAnalysisPassName(StringRef PassName) { void PassBuilder::registerParseTopLevelPipelineCallback( const std::function, - bool VerifyEachPass, bool DebugLogging)> &C) { + bool DebugLogging)> &C) { TopLevelPipelineParsingCallbacks.push_back(C); } diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 06aa3868fd6d4..3b591c021a7c3 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassInstrumentation.h" +#include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" @@ -233,19 +234,19 @@ void unwrapAndPrint(raw_ostream &OS, Any IR, StringRef Banner, } // Return true when this is a pass for which changes should be ignored -inline bool isIgnored(StringRef PassID) { +bool isIgnored(StringRef PassID) { return isSpecialPass(PassID, {"PassManager", "PassAdaptor", "AnalysisManagerProxy"}); } // Return true when this is a defined function for which printing // of changes is desired. -inline bool isInterestingFunction(const Function &F) { +bool isInterestingFunction(const Function &F) { return llvm::isFunctionInPrintList(F.getName()); } // Return true when this is a pass for which printing of changes is desired. -inline bool isInterestingPass(StringRef PassID) { +bool isInterestingPass(StringRef PassID) { if (isIgnored(PassID)) return false; @@ -724,6 +725,42 @@ void PreservedCFGCheckerInstrumentation::registerCallbacks( }); } +void VerifyInstrumentation::registerCallbacks( + PassInstrumentationCallbacks &PIC) { + PIC.registerAfterPassCallback( + [this](StringRef P, Any IR, const PreservedAnalyses &PassPA) { + if (isIgnored(P) || P == "VerifierPass") + return; + if (any_isa(IR) || any_isa(IR)) { + const Function *F; + if (any_isa(IR)) + F = any_cast(IR)->getHeader()->getParent(); + else + F = any_cast(IR); + if (DebugLogging) + dbgs() << "Verifying function " << F->getName() << "\n"; + + if (verifyFunction(*F)) + report_fatal_error("Broken function found, compilation aborted!"); + } else if (any_isa(IR) || + any_isa(IR)) { + const Module *M; + if (any_isa(IR)) + M = any_cast(IR) + ->begin() + ->getFunction() + .getParent(); + else + M = any_cast(IR); + if (DebugLogging) + dbgs() << "Verifying module " << M->getName() << "\n"; + + if (verifyModule(*M)) + report_fatal_error("Broken module found, compilation aborted!"); + } + }); +} + void StandardInstrumentations::registerCallbacks( PassInstrumentationCallbacks &PIC) { PrintIR.registerCallbacks(PIC); @@ -732,4 +769,6 @@ void StandardInstrumentations::registerCallbacks( OptNone.registerCallbacks(PIC); PreservedCFGChecker.registerCallbacks(PIC); PrintChangedIR.registerCallbacks(PIC); + if (VerifyEach) + Verify.registerCallbacks(PIC); } diff --git a/llvm/test/Other/new-pass-manager-verify-each.ll b/llvm/test/Other/new-pass-manager-verify-each.ll new file mode 100644 index 0000000000000..1af48be8d1200 --- /dev/null +++ b/llvm/test/Other/new-pass-manager-verify-each.ll @@ -0,0 +1,39 @@ +; RUN: opt -disable-output -debug-pass-manager -verify-each -passes='no-op-module,verify,cgscc(no-op-cgscc,function(no-op-function,loop(no-op-loop)))' %s 2>&1 | FileCheck %s + +; Added manually by opt at beginning +; CHECK: Running pass: VerifierPass + +; CHECK: Running pass: NoOpModulePass +; CHECK: Verifying module +; CHECK-NOT: Verifying module +; CHECK: Running pass: NoOpCGSCCPass +; CHECK: Verifying module +; CHECK-NOT: Verifying module +; CHECK: Running pass: NoOpFunctionPass +; CHECK: Verifying function foo +; CHECK: Running pass: LoopSimplifyPass +; CHECK: Verifying function foo +; CHECK: Running pass: LCSSAPass +; CHECK: Verifying function foo +; CHECK: Running pass: NoOpLoopPass +; CHECK: Verifying function foo +; CHECK-NOT: Verifying function +; CHECK-NOT: Verifying module + +; Added manually by opt at end +; CHECK: Running pass: VerifierPass + +define void @foo(i1 %x, i8* %p1, i8* %p2) { +entry: + store i8 42, i8* %p1 + br i1 %x, label %loop, label %exit + +loop: + %tmp1 = load i8, i8* %p2 + br label %loop + +exit: + ret void +} + +declare void @bar() diff --git a/llvm/test/Other/new-pass-manager.ll b/llvm/test/Other/new-pass-manager.ll index 70d1f7152120a..66cf4fb111e92 100644 --- a/llvm/test/Other/new-pass-manager.ll +++ b/llvm/test/Other/new-pass-manager.ll @@ -104,19 +104,6 @@ ; RUN: | llvm-dis \ ; RUN: | FileCheck %s --check-prefix=CHECK-NOOP -; RUN: opt -disable-output -debug-pass-manager -verify-each -passes='no-op-module,function(no-op-function)' %s 2>&1 \ -; RUN: | FileCheck %s --check-prefix=CHECK-VERIFY-EACH -; CHECK-VERIFY-EACH: Starting llvm::Module pass manager run -; CHECK-VERIFY-EACH: Running pass: VerifierPass -; CHECK-VERIFY-EACH: Running pass: NoOpModulePass -; CHECK-VERIFY-EACH: Running pass: VerifierPass -; CHECK-VERIFY-EACH: Starting llvm::Function pass manager run -; CHECK-VERIFY-EACH: Running pass: NoOpFunctionPass -; CHECK-VERIFY-EACH: Running pass: VerifierPass -; CHECK-VERIFY-EACH: Finished llvm::Function pass manager run -; CHECK-VERIFY-EACH: Running pass: VerifierPass -; CHECK-VERIFY-EACH: Finished llvm::Module pass manager run - ; RUN: opt -disable-output -debug-pass-manager -disable-verify -passes='no-op-module,function(no-op-function)' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-NO-VERIFY ; CHECK-NO-VERIFY: Starting llvm::Module pass manager run diff --git a/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp b/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp index 4c672bc2a3bd4..40aac7c3a2814 100644 --- a/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp +++ b/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp @@ -148,7 +148,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { PB.registerLoopAnalyses(LAM); PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); - auto Err = PB.parsePassPipeline(MPM, PassPipeline, false, false); + auto Err = PB.parsePassPipeline(MPM, PassPipeline, false); assert(!Err && "Should have been checked during fuzzer initialization"); // Only fail with assert above, otherwise ignore the parsing error. consumeError(std::move(Err)); @@ -241,7 +241,7 @@ extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize( PassBuilder PB(TM.get()); ModulePassManager MPM; - if (auto Err = PB.parsePassPipeline(MPM, PassPipeline, false, false)) { + if (auto Err = PB.parsePassPipeline(MPM, PassPipeline, false)) { errs() << *argv[0] << ": " << toString(std::move(Err)) << "\n"; exit(1); } diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index f01d33efe45ad..98a8454f95289 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -140,72 +140,68 @@ bool tryParsePipelineText(PassBuilder &PB, /// If one of the EPPipeline command line options was given, register callbacks /// for parsing and inserting the given pipeline -static void registerEPCallbacks(PassBuilder &PB, bool VerifyEachPass, - bool DebugLogging) { +static void registerEPCallbacks(PassBuilder &PB, bool DebugLogging) { if (tryParsePipelineText(PB, PeepholeEPPipeline)) PB.registerPeepholeEPCallback( - [&PB, VerifyEachPass, DebugLogging]( - FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) { + [&PB, DebugLogging](FunctionPassManager &PM, + PassBuilder::OptimizationLevel Level) { ExitOnError Err("Unable to parse PeepholeEP pipeline: "); - Err(PB.parsePassPipeline(PM, PeepholeEPPipeline, VerifyEachPass, - DebugLogging)); + Err(PB.parsePassPipeline(PM, PeepholeEPPipeline, DebugLogging)); }); if (tryParsePipelineText(PB, LateLoopOptimizationsEPPipeline)) PB.registerLateLoopOptimizationsEPCallback( - [&PB, VerifyEachPass, DebugLogging]( - LoopPassManager &PM, PassBuilder::OptimizationLevel Level) { + [&PB, DebugLogging](LoopPassManager &PM, + PassBuilder::OptimizationLevel Level) { ExitOnError Err("Unable to parse LateLoopOptimizationsEP pipeline: "); Err(PB.parsePassPipeline(PM, LateLoopOptimizationsEPPipeline, - VerifyEachPass, DebugLogging)); + DebugLogging)); }); if (tryParsePipelineText(PB, LoopOptimizerEndEPPipeline)) PB.registerLoopOptimizerEndEPCallback( - [&PB, VerifyEachPass, DebugLogging]( - LoopPassManager &PM, PassBuilder::OptimizationLevel Level) { + [&PB, DebugLogging](LoopPassManager &PM, + PassBuilder::OptimizationLevel Level) { ExitOnError Err("Unable to parse LoopOptimizerEndEP pipeline: "); Err(PB.parsePassPipeline(PM, LoopOptimizerEndEPPipeline, - VerifyEachPass, DebugLogging)); + DebugLogging)); }); if (tryParsePipelineText(PB, ScalarOptimizerLateEPPipeline)) PB.registerScalarOptimizerLateEPCallback( - [&PB, VerifyEachPass, DebugLogging]( - FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) { + [&PB, DebugLogging](FunctionPassManager &PM, + PassBuilder::OptimizationLevel Level) { ExitOnError Err("Unable to parse ScalarOptimizerLateEP pipeline: "); Err(PB.parsePassPipeline(PM, ScalarOptimizerLateEPPipeline, - VerifyEachPass, DebugLogging)); + DebugLogging)); }); if (tryParsePipelineText(PB, CGSCCOptimizerLateEPPipeline)) PB.registerCGSCCOptimizerLateEPCallback( - [&PB, VerifyEachPass, DebugLogging]( - CGSCCPassManager &PM, PassBuilder::OptimizationLevel Level) { + [&PB, DebugLogging](CGSCCPassManager &PM, + PassBuilder::OptimizationLevel Level) { ExitOnError Err("Unable to parse CGSCCOptimizerLateEP pipeline: "); Err(PB.parsePassPipeline(PM, CGSCCOptimizerLateEPPipeline, - VerifyEachPass, DebugLogging)); + DebugLogging)); }); if (tryParsePipelineText(PB, VectorizerStartEPPipeline)) PB.registerVectorizerStartEPCallback( - [&PB, VerifyEachPass, DebugLogging]( - FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) { + [&PB, DebugLogging](FunctionPassManager &PM, + PassBuilder::OptimizationLevel Level) { ExitOnError Err("Unable to parse VectorizerStartEP pipeline: "); Err(PB.parsePassPipeline(PM, VectorizerStartEPPipeline, - VerifyEachPass, DebugLogging)); + DebugLogging)); }); if (tryParsePipelineText(PB, PipelineStartEPPipeline)) PB.registerPipelineStartEPCallback( - [&PB, VerifyEachPass, DebugLogging](ModulePassManager &PM) { + [&PB, DebugLogging](ModulePassManager &PM) { ExitOnError Err("Unable to parse PipelineStartEP pipeline: "); - Err(PB.parsePassPipeline(PM, PipelineStartEPPipeline, VerifyEachPass, - DebugLogging)); + Err(PB.parsePassPipeline(PM, PipelineStartEPPipeline, DebugLogging)); }); if (tryParsePipelineText(PB, OptimizerLastEPPipeline)) PB.registerOptimizerLastEPCallback( - [&PB, VerifyEachPass, DebugLogging](ModulePassManager &PM, - PassBuilder::OptimizationLevel) { + [&PB, DebugLogging](ModulePassManager &PM, + PassBuilder::OptimizationLevel) { ExitOnError Err("Unable to parse OptimizerLastEP pipeline: "); - Err(PB.parsePassPipeline(PM, OptimizerLastEPPipeline, VerifyEachPass, - DebugLogging)); + Err(PB.parsePassPipeline(PM, OptimizerLastEPPipeline, DebugLogging)); }); } @@ -264,7 +260,7 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, } } PassInstrumentationCallbacks PIC; - StandardInstrumentations SI(DebugPM); + StandardInstrumentations SI(DebugPM, VerifyEachPass); SI.registerCallbacks(PIC); PipelineTuningOptions PTO; @@ -274,7 +270,7 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, PTO.LoopUnrolling = !DisableLoopUnrolling; PTO.Coroutines = Coroutines; PassBuilder PB(TM, PTO, P, &PIC); - registerEPCallbacks(PB, VerifyEachPass, DebugPM); + registerEPCallbacks(PB, DebugPM); // Load requested pass plugins and let them register pass builder callbacks for (auto &PluginFN : PassPlugins) { @@ -387,8 +383,7 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, if (!PassPipeline.empty()) { assert(Passes.empty() && "PassPipeline and Passes should not both contain passes"); - if (auto Err = - PB.parsePassPipeline(MPM, PassPipeline, VerifyEachPass, DebugPM)) { + if (auto Err = PB.parsePassPipeline(MPM, PassPipeline, DebugPM)) { errs() << Arg0 << ": " << toString(std::move(Err)) << "\n"; return false; } @@ -397,8 +392,7 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, std::string ModifiedPassName(PassName.begin(), PassName.end()); if (PB.isAnalysisPassName(PassName)) ModifiedPassName = "require<" + ModifiedPassName + ">"; - if (auto Err = PB.parsePassPipeline(MPM, ModifiedPassName, VerifyEachPass, - DebugPM)) { + if (auto Err = PB.parsePassPipeline(MPM, ModifiedPassName, DebugPM)) { errs() << Arg0 << ": " << toString(std::move(Err)) << "\n"; return false; } diff --git a/llvm/unittests/IR/PassBuilderCallbacksTest.cpp b/llvm/unittests/IR/PassBuilderCallbacksTest.cpp index 44c0fe9ffd166..83b9ae6239326 100644 --- a/llvm/unittests/IR/PassBuilderCallbacksTest.cpp +++ b/llvm/unittests/IR/PassBuilderCallbacksTest.cpp @@ -502,7 +502,7 @@ TEST_F(ModuleCallbacksTest, Passes) { .WillOnce(Invoke(getAnalysisResult)); StringRef PipelineText = "test-transform"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); @@ -547,7 +547,7 @@ TEST_F(ModuleCallbacksTest, InstrumentedPasses) { .Times(0); StringRef PipelineText = "test-transform"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); @@ -663,7 +663,7 @@ TEST_F(ModuleCallbacksTest, InstrumentedSkippedPasses) { StringRef PipelineText = "test-transform,function(test-transform),cgscc(" "function(loop(test-transform)))"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); @@ -675,7 +675,7 @@ TEST_F(FunctionCallbacksTest, Passes) { .WillOnce(Invoke(getAnalysisResult)); StringRef PipelineText = "test-transform"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); } @@ -725,7 +725,7 @@ TEST_F(FunctionCallbacksTest, InstrumentedPasses) { .Times(0); StringRef PipelineText = "test-transform"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); } @@ -771,7 +771,7 @@ TEST_F(FunctionCallbacksTest, InstrumentedSkippedPasses) { .Times(0); StringRef PipelineText = "test-transform"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); } @@ -782,7 +782,7 @@ TEST_F(LoopCallbacksTest, Passes) { .WillOnce(WithArgs<0, 1, 2>(Invoke(getAnalysisResult))); StringRef PipelineText = "test-transform"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); } @@ -833,7 +833,7 @@ TEST_F(LoopCallbacksTest, InstrumentedPasses) { .Times(0); StringRef PipelineText = "test-transform"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); } @@ -881,7 +881,7 @@ TEST_F(LoopCallbacksTest, InstrumentedInvalidatingPasses) { .Times(0); StringRef PipelineText = "test-transform"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); } @@ -925,7 +925,7 @@ TEST_F(LoopCallbacksTest, InstrumentedSkippedPasses) { .Times(0); StringRef PipelineText = "test-transform"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); } @@ -936,7 +936,7 @@ TEST_F(CGSCCCallbacksTest, Passes) { .WillOnce(WithArgs<0, 1, 2>(Invoke(getAnalysisResult))); StringRef PipelineText = "test-transform"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); } @@ -987,7 +987,7 @@ TEST_F(CGSCCCallbacksTest, InstrumentedPasses) { .Times(0); StringRef PipelineText = "test-transform"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); } @@ -1035,7 +1035,7 @@ TEST_F(CGSCCCallbacksTest, InstrumentedInvalidatingPasses) { .Times(0); StringRef PipelineText = "test-transform"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); } @@ -1080,7 +1080,7 @@ TEST_F(CGSCCCallbacksTest, InstrumentedSkippedPasses) { .Times(0); StringRef PipelineText = "test-transform"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); } @@ -1095,7 +1095,7 @@ TEST_F(ModuleCallbacksTest, AnalysisUtilities) { EXPECT_CALL(AnalysisHandle, invalidate(HasName(""), _, _)); StringRef PipelineText = "require,invalidate"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); } @@ -1105,7 +1105,7 @@ TEST_F(CGSCCCallbacksTest, PassUtilities) { EXPECT_CALL(AnalysisHandle, invalidate(HasName("(foo)"), _, _)); StringRef PipelineText = "require,invalidate"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); } @@ -1115,7 +1115,7 @@ TEST_F(FunctionCallbacksTest, AnalysisUtilities) { EXPECT_CALL(AnalysisHandle, invalidate(HasName("foo"), _, _)); StringRef PipelineText = "require,invalidate"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); } @@ -1126,7 +1126,7 @@ TEST_F(LoopCallbacksTest, PassUtilities) { StringRef PipelineText = "require,invalidate"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); } @@ -1139,25 +1139,27 @@ TEST_F(LoopCallbacksTest, PassUtilities) { /// This test parses a pipeline named 'another-pipeline', whose only elements /// may be the test-transform pass or the analysis utilities TEST_F(ModuleCallbacksTest, ParseTopLevelPipeline) { - PB.registerParseTopLevelPipelineCallback([this]( - ModulePassManager &MPM, ArrayRef Pipeline, - bool VerifyEachPass, bool DebugLogging) { - auto &FirstName = Pipeline.front().Name; - auto &InnerPipeline = Pipeline.front().InnerPipeline; - if (FirstName == "another-pipeline") { - for (auto &E : InnerPipeline) { - if (parseAnalysisUtilityPasses("test-analysis", E.Name, PM)) - continue; - - if (E.Name == "test-transform") { - PM.addPass(PassHandle.getPass()); - continue; + PB.registerParseTopLevelPipelineCallback( + [this](ModulePassManager &MPM, + ArrayRef Pipeline, + bool DebugLogging) { + auto &FirstName = Pipeline.front().Name; + auto &InnerPipeline = Pipeline.front().InnerPipeline; + if (FirstName == "another-pipeline") { + for (auto &E : InnerPipeline) { + if (parseAnalysisUtilityPasses("test-analysis", E.Name, + PM)) + continue; + + if (E.Name == "test-transform") { + PM.addPass(PassHandle.getPass()); + continue; + } + return false; + } } - return false; - } - } - return true; - }); + return true; + }); EXPECT_CALL(AnalysisHandle, run(HasName(""), _)); EXPECT_CALL(PassHandle, run(HasName(""), _)) @@ -1166,13 +1168,13 @@ TEST_F(ModuleCallbacksTest, ParseTopLevelPipeline) { StringRef PipelineText = "another-pipeline(test-transform,invalidate)"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) << "Pipeline was: " << PipelineText; PM.run(*M, AM); /// Test the negative case PipelineText = "another-pipeline(instcombine)"; - ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Failed()) + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Failed()) << "Pipeline was: " << PipelineText; } } // end anonymous namespace diff --git a/polly/lib/Support/RegisterPasses.cpp b/polly/lib/Support/RegisterPasses.cpp index ad6edb5807cce..e9687df6e964b 100644 --- a/polly/lib/Support/RegisterPasses.cpp +++ b/polly/lib/Support/RegisterPasses.cpp @@ -678,7 +678,7 @@ static bool isScopPassName(StringRef Name) { static bool parseTopLevelPipeline(ModulePassManager &MPM, ArrayRef Pipeline, - bool VerifyEachPass, bool DebugLogging) { + bool DebugLogging) { std::vector FullPipeline; StringRef FirstName = Pipeline.front().Name; @@ -698,11 +698,7 @@ parseTopLevelPipeline(ModulePassManager &MPM, } FPM.addPass(createFunctionToScopPassAdaptor(std::move(SPM))); - if (VerifyEachPass) - FPM.addPass(VerifierPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - if (VerifyEachPass) - MPM.addPass(VerifierPass()); return true; } From a5ef2e0a1e3bf5a76b0c170ca7791564ab1a8375 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Thu, 8 Oct 2020 10:50:44 +0700 Subject: [PATCH 290/321] Return "[SCEV] Prove implicaitons via AddRec start" The initial version of the patch was reverted because it missed the check that the predicate being proved is actually guarded by this check on 1st iteration. If it was not executed on 1st iteration (but possibly executes after that), then it is incorrect to use reasoning about IV start to prove it. Added the test where the miscompile was seen. Unfortunately, my attempts to reduce it with bugpoint did not succeed; it can further be reduced when we understand how to do it without losing the initial bug's notion. Returning assuming the miscompiles are now gone. Differential Revision: https://reviews.llvm.org/D88208 --- llvm/include/llvm/Analysis/ScalarEvolution.h | 31 +- llvm/lib/Analysis/ScalarEvolution.cpp | 110 ++++-- .../addrec_no_exec_on_every_iteration.ll | 365 ++++++++++++++++++ .../Analysis/ScalarEvolutionTest.cpp | 65 ++++ 4 files changed, 539 insertions(+), 32 deletions(-) create mode 100644 llvm/test/Transforms/IndVarSimplify/addrec_no_exec_on_every_iteration.ll diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index febca473776aa..158257a5aa9a1 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1677,23 +1677,30 @@ class ScalarEvolution { getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB) const; /// Test whether the condition described by Pred, LHS, and RHS is true - /// whenever the given FoundCondValue value evaluates to true. + /// whenever the given FoundCondValue value evaluates to true in given + /// Context. If Context is nullptr, then the found predicate is true + /// everywhere. bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - const Value *FoundCondValue, bool Inverse); + const Value *FoundCondValue, bool Inverse, + const Instruction *Context = nullptr); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is - /// true. + /// true in given Context. If Context is nullptr, then the found predicate is + /// true everywhere. bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, - const SCEV *FoundRHS); + const SCEV *FoundRHS, + const Instruction *Context = nullptr); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is - /// true. + /// true in given Context. If Context is nullptr, then the found predicate is + /// true everywhere. bool isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, - const SCEV *FoundRHS); + const SCEV *FoundRHS, + const Instruction *Context = nullptr); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is @@ -1740,6 +1747,18 @@ class ScalarEvolution { const SCEV *FoundLHS, const SCEV *FoundRHS); + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. + /// + /// This routine tries to weaken the known condition basing on fact that + /// FoundLHS is an AddRec. + bool isImpliedCondOperandsViaAddRecStart(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS, + const Instruction *Context); + /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true. diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index f3764966f3017..79a18da679d2d 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -9549,15 +9549,16 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, // Try to prove (Pred, LHS, RHS) using isImpliedCond. auto ProveViaCond = [&](const Value *Condition, bool Inverse) { - if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse)) + const Instruction *Context = &BB->front(); + if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, Context)) return true; if (ProvingStrictComparison) { if (!ProvedNonStrictComparison) - ProvedNonStrictComparison = - isImpliedCond(NonStrictPredicate, LHS, RHS, Condition, Inverse); + ProvedNonStrictComparison = isImpliedCond(NonStrictPredicate, LHS, RHS, + Condition, Inverse, Context); if (!ProvedNonEquality) - ProvedNonEquality = - isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS, Condition, Inverse); + ProvedNonEquality = isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS, + Condition, Inverse, Context); if (ProvedNonStrictComparison && ProvedNonEquality) return true; } @@ -9623,7 +9624,8 @@ bool ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - const Value *FoundCondValue, bool Inverse) { + const Value *FoundCondValue, bool Inverse, + const Instruction *Context) { if (!PendingLoopPredicates.insert(FoundCondValue).second) return false; @@ -9634,12 +9636,16 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, if (const BinaryOperator *BO = dyn_cast(FoundCondValue)) { if (BO->getOpcode() == Instruction::And) { if (!Inverse) - return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || - isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse, + Context) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse, + Context); } else if (BO->getOpcode() == Instruction::Or) { if (Inverse) - return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || - isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse, + Context) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse, + Context); } } @@ -9657,14 +9663,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); - return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS); + return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, Context); } bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, - const SCEV *FoundLHS, - const SCEV *FoundRHS) { + const SCEV *FoundLHS, const SCEV *FoundRHS, + const Instruction *Context) { // Balance the types. if (getTypeSizeInBits(LHS->getType()) < getTypeSizeInBits(FoundLHS->getType())) { @@ -9708,16 +9714,16 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // Check whether the found predicate is the same as the desired predicate. if (FoundPred == Pred) - return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context); // Check whether swapping the found predicate makes it the same as the // desired predicate. if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) { if (isa(RHS)) - return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS); + return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, Context); else - return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), - RHS, LHS, FoundLHS, FoundRHS); + return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), RHS, + LHS, FoundLHS, FoundRHS, Context); } // Unsigned comparison is the same as signed comparison when both the operands @@ -9725,7 +9731,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, if (CmpInst::isUnsigned(FoundPred) && CmpInst::getSignedPredicate(FoundPred) == Pred && isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) - return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context); // Check if we can make progress by sharpening ranges. if (FoundPred == ICmpInst::ICMP_NE && @@ -9762,8 +9768,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, case ICmpInst::ICMP_UGE: // We know V `Pred` SharperMin. If this implies LHS `Pred` // RHS, we're done. - if (isImpliedCondOperands(Pred, LHS, RHS, V, - getConstant(SharperMin))) + if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin), + Context)) return true; LLVM_FALLTHROUGH; @@ -9778,7 +9784,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // // If V `Pred` Min implies LHS `Pred` RHS, we're done. - if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min))) + if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min), + Context)) return true; break; @@ -9786,14 +9793,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_ULE: if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS, - LHS, V, getConstant(SharperMin))) + LHS, V, getConstant(SharperMin), Context)) return true; LLVM_FALLTHROUGH; case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_ULT: if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS, - LHS, V, getConstant(Min))) + LHS, V, getConstant(Min), Context)) return true; break; @@ -9807,11 +9814,12 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // Check whether the actual condition is beyond sufficient. if (FoundPred == ICmpInst::ICMP_EQ) if (ICmpInst::isTrueWhenEqual(Pred)) - if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS)) + if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context)) return true; if (Pred == ICmpInst::ICMP_NE) if (!ICmpInst::isTrueWhenEqual(FoundPred)) - if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS)) + if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS, + Context)) return true; // Otherwise assume the worst. @@ -9890,6 +9898,51 @@ Optional ScalarEvolution::computeConstantDifference(const SCEV *More, return None; } +bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart( + ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *Context) { + // Try to recognize the following pattern: + // + // FoundRHS = ... + // ... + // loop: + // FoundLHS = {Start,+,W} + // context_bb: // Basic block from the same loop + // known(Pred, FoundLHS, FoundRHS) + // + // If some predicate is known in the context of a loop, it is also known on + // each iteration of this loop, including the first iteration. Therefore, in + // this case, `FoundLHS Pred FoundRHS` implies `Start Pred FoundRHS`. Try to + // prove the original pred using this fact. + if (!Context) + return false; + const BasicBlock *ContextBB = Context->getParent(); + // Make sure AR varies in the context block. + if (auto *AR = dyn_cast(FoundLHS)) { + const Loop *L = AR->getLoop(); + // Make sure that context belongs to the loop and executes on 1st iteration + // (if it ever executes at all). + if (!L->contains(ContextBB) || !DT.dominates(ContextBB, L->getLoopLatch())) + return false; + if (!isAvailableAtLoopEntry(FoundRHS, AR->getLoop())) + return false; + return isImpliedCondOperands(Pred, LHS, RHS, AR->getStart(), FoundRHS); + } + + if (auto *AR = dyn_cast(FoundRHS)) { + const Loop *L = AR->getLoop(); + // Make sure that context belongs to the loop and executes on 1st iteration + // (if it ever executes at all). + if (!L->contains(ContextBB) || !DT.dominates(ContextBB, L->getLoopLatch())) + return false; + if (!isAvailableAtLoopEntry(FoundLHS, AR->getLoop())) + return false; + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, AR->getStart()); + } + + return false; +} + bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS) { @@ -10080,13 +10133,18 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred, bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, - const SCEV *FoundRHS) { + const SCEV *FoundRHS, + const Instruction *Context) { if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; + if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS, + Context)) + return true; + return isImpliedCondOperandsHelper(Pred, LHS, RHS, FoundLHS, FoundRHS) || // ~x < ~y --> x > y diff --git a/llvm/test/Transforms/IndVarSimplify/addrec_no_exec_on_every_iteration.ll b/llvm/test/Transforms/IndVarSimplify/addrec_no_exec_on_every_iteration.ll new file mode 100644 index 0000000000000..640edb528789e --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/addrec_no_exec_on_every_iteration.ll @@ -0,0 +1,365 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -indvars -S | FileCheck %s +; RUN: opt < %s -passes=indvars -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nofree norecurse nounwind uwtable +define void @test(i8* nocapture readnone %arg, i8* noalias nocapture readnone %arg1, i8** noalias nocapture readnone %arg2, i8** noalias nocapture readonly %arg3, i64* noalias nocapture readnone %arg4) local_unnamed_addr #0 { +; CHECK-LABEL: @test +bb: + %tmp = bitcast i8** %arg3 to [1 x [4 x [10 x [5 x float]]]]** + %tmp5 = load [1 x [4 x [10 x [5 x float]]]]*, [1 x [4 x [10 x [5 x float]]]]** %tmp, align 8, !invariant.load !0, !dereferenceable !1, !align !2 + %tmp6 = getelementptr inbounds i8*, i8** %arg3, i64 3 + %tmp7 = load i8*, i8** %tmp6, align 8, !invariant.load !0, !dereferenceable !3, !align !2 + %tmp8 = bitcast i8* %tmp7 to [10 x [5 x [2 x [1 x [2 x float]]]]]* + br label %bb9 + +bb9: ; preds = %bb33, %bb + %tmp10 = phi i64 [ 0, %bb ], [ %tmp34, %bb33 ] + %tmp11 = sub nsw i64 9, %tmp10 + br label %bb12 + +bb12: ; preds = %bb30, %bb9 + %tmp13 = phi i64 [ 0, %bb9 ], [ %tmp31, %bb30 ] + %tmp14 = sub nsw i64 4, %tmp13 + br label %bb15 + +bb15: ; preds = %bb27, %bb12 + %tmp16 = phi i64 [ 0, %bb12 ], [ %tmp28, %bb27 ] + %tmp17 = mul i64 %tmp16, -2 + %tmp18 = add i64 %tmp17, 2 + br label %bb19 + +bb19: ; preds = %bb19, %bb15 + %tmp20 = phi i64 [ 0, %bb15 ], [ %tmp25, %bb19 ] + %tmp21 = add nuw nsw i64 %tmp18, %tmp20 + %tmp22 = getelementptr inbounds [1 x [4 x [10 x [5 x float]]]], [1 x [4 x [10 x [5 x float]]]]* %tmp5, i64 0, i64 0, i64 %tmp21, i64 %tmp11, i64 %tmp14 + %tmp23 = load float, float* %tmp22, align 4, !invariant.load !0, !noalias !4 + %tmp24 = getelementptr inbounds [10 x [5 x [2 x [1 x [2 x float]]]]], [10 x [5 x [2 x [1 x [2 x float]]]]]* %tmp8, i64 0, i64 %tmp10, i64 %tmp13, i64 %tmp16, i64 0, i64 %tmp20 + store float %tmp23, float* %tmp24, align 4, !alias.scope !4, !noalias !7 + %tmp25 = add nuw nsw i64 %tmp20, 1 + %tmp26 = icmp eq i64 %tmp20, 0 + br i1 %tmp26, label %bb19, label %bb27 + +bb27: ; preds = %bb19 + %tmp28 = add nuw nsw i64 %tmp16, 1 + %tmp29 = icmp eq i64 %tmp16, 0 + br i1 %tmp29, label %bb15, label %bb30 + +bb30: ; preds = %bb27 + %tmp31 = add nuw nsw i64 %tmp13, 1 + %tmp32 = icmp ugt i64 %tmp13, 3 + br i1 %tmp32, label %bb33, label %bb12 + +bb33: ; preds = %bb30 + %tmp34 = add nuw nsw i64 %tmp10, 1 + %tmp35 = icmp ugt i64 %tmp10, 8 + br i1 %tmp35, label %bb36, label %bb9 + +bb36: ; preds = %bb33 + %tmp37 = getelementptr inbounds i8*, i8** %arg3, i64 1 + %tmp38 = bitcast i8** %tmp37 to [1 x [4 x [6 x [7 x float]]]]** + %tmp39 = load [1 x [4 x [6 x [7 x float]]]]*, [1 x [4 x [6 x [7 x float]]]]** %tmp38, align 8, !invariant.load !0, !dereferenceable !10, !align !2 + %tmp40 = getelementptr inbounds i8, i8* %tmp7, i64 800 + %tmp41 = bitcast i8* %tmp40 to [2 x [6 x [7 x [2 x [1 x float]]]]]* + br label %bb42 + +bb42: ; preds = %bb63, %bb36 + %tmp43 = phi i64 [ 0, %bb36 ], [ %tmp64, %bb63 ] + br label %bb44 + +bb44: ; preds = %bb60, %bb42 + %tmp45 = phi i64 [ 0, %bb42 ], [ %tmp61, %bb60 ] + br label %bb46 + +bb46: ; preds = %bb57, %bb44 + %tmp47 = phi i64 [ 0, %bb44 ], [ %tmp58, %bb57 ] + br label %bb48 + +bb48: ; preds = %bb48, %bb46 + %tmp49 = phi i64 [ 0, %bb46 ], [ %tmp55, %bb48 ] + %tmp50 = shl nuw nsw i64 %tmp49, 1 + %tmp51 = add nuw nsw i64 %tmp50, %tmp43 + %tmp52 = getelementptr inbounds [1 x [4 x [6 x [7 x float]]]], [1 x [4 x [6 x [7 x float]]]]* %tmp39, i64 0, i64 0, i64 %tmp51, i64 %tmp45, i64 %tmp47 + %tmp53 = load float, float* %tmp52, align 4, !invariant.load !0, !noalias !11 + %tmp54 = getelementptr inbounds [2 x [6 x [7 x [2 x [1 x float]]]]], [2 x [6 x [7 x [2 x [1 x float]]]]]* %tmp41, i64 0, i64 %tmp43, i64 %tmp45, i64 %tmp47, i64 %tmp49, i64 0 + store float %tmp53, float* %tmp54, align 4, !alias.scope !11, !noalias !12 + %tmp55 = add nuw nsw i64 %tmp49, 1 + %tmp56 = icmp eq i64 %tmp49, 0 + br i1 %tmp56, label %bb48, label %bb57 + +bb57: ; preds = %bb48 + %tmp58 = add nuw nsw i64 %tmp47, 1 + %tmp59 = icmp ugt i64 %tmp47, 5 + br i1 %tmp59, label %bb60, label %bb46 + +bb60: ; preds = %bb57 + %tmp61 = add nuw nsw i64 %tmp45, 1 + %tmp62 = icmp ugt i64 %tmp45, 4 + br i1 %tmp62, label %bb63, label %bb44 + +bb63: ; preds = %bb60 + %tmp64 = add nuw nsw i64 %tmp43, 1 + %tmp65 = icmp eq i64 %tmp43, 0 + br i1 %tmp65, label %bb42, label %bb66 + +bb66: ; preds = %bb63 + %tmp67 = getelementptr inbounds i8, i8* %tmp7, i64 1472 + %tmp68 = bitcast i8* %tmp67 to [2 x [1 x [2 x [2 x [2 x float]]]]]* + br label %bb69 + +bb69: ; preds = %bb140, %bb66 + %tmp70 = phi i64 [ 0, %bb66 ], [ %tmp141, %bb140 ] + br label %bb71 + +bb71: ; preds = %bb137, %bb69 + %tmp72 = phi i64 [ 0, %bb69 ], [ %tmp138, %bb137 ] + %tmp73 = shl nuw nsw i64 %tmp72, 1 + %tmp74 = add nsw i64 %tmp73, -2 + br label %bb75 + +bb75: ; preds = %bb134, %bb71 + %tmp76 = phi i64 [ 0, %bb71 ], [ %tmp135, %bb134 ] + %tmp77 = add nsw i64 %tmp76, -1 + br label %bb78 + +bb78: ; preds = %bb129, %bb75 + %tmp79 = phi i64 [ 0, %bb75 ], [ %tmp132, %bb129 ] + br label %bb80 + +bb80: ; preds = %bb125, %bb78 + %tmp81 = phi float [ 0.000000e+00, %bb78 ], [ %tmp126, %bb125 ] + %tmp82 = phi i64 [ 0, %bb78 ], [ %tmp127, %bb125 ] + %tmp83 = shl nuw nsw i64 %tmp82, 1 + %tmp84 = add nsw i64 %tmp83, -1 + %tmp85 = icmp ult i64 %tmp84, 10 + %tmp86 = sub nsw i64 5, %tmp82 + br i1 %tmp85, label %bb88, label %bb87 + +bb87: ; preds = %bb80 + br label %bb124 + +bb88: ; preds = %bb80 + br label %bb89 + +bb89: ; preds = %bb100, %bb88 + %tmp90 = phi float [ %tmp101, %bb100 ], [ %tmp81, %bb88 ] + %tmp91 = phi i64 [ %tmp102, %bb100 ], [ 0, %bb88 ] + %tmp92 = add i64 %tmp74, %tmp91 + %tmp93 = icmp ult i64 %tmp92, 5 + %tmp94 = sub nsw i64 6, %tmp91 + br i1 %tmp93, label %bb96, label %bb95 + +bb95: ; preds = %bb89 + br label %bb99 + +bb96: ; preds = %bb89 + br label %bb104 + +bb97: ; preds = %bb110 + %tmp98 = phi float [ %tmp111, %bb110 ] + br label %bb100 + +bb99: ; preds = %bb95 + br label %bb100 + +bb100: ; preds = %bb99, %bb97 + %tmp101 = phi float [ %tmp98, %bb97 ], [ %tmp90, %bb99 ] + %tmp102 = add nuw nsw i64 %tmp91, 1 + %tmp103 = icmp ugt i64 %tmp91, 5 + br i1 %tmp103, label %bb122, label %bb89 + +bb104: ; preds = %bb110, %bb96 + %tmp105 = phi float [ %tmp111, %bb110 ], [ %tmp90, %bb96 ] + %tmp106 = phi i64 [ %tmp112, %bb110 ], [ 0, %bb96 ] + %tmp107 = shl nuw nsw i64 %tmp106, 1 + ; CHECK-NOT: %bugged = add nuw nsw + ; CHECK: %bugged = add nsw + %bugged = add i64 %tmp77, %tmp107 + %tmp109 = icmp ult i64 %bugged, 2 + br i1 %tmp109, label %bb114, label %bb110 + +bb110: ; preds = %bb114, %bb104 + %tmp111 = phi float [ %tmp121, %bb114 ], [ %tmp105, %bb104 ] + %tmp112 = add nuw nsw i64 %tmp106, 1 + %tmp113 = icmp eq i64 %tmp106, 0 + br i1 %tmp113, label %bb104, label %bb97 + +bb114: ; preds = %bb104 + %tmp115 = sub nsw i64 1, %tmp106 + %tmp116 = getelementptr inbounds [2 x [6 x [7 x [2 x [1 x float]]]]], [2 x [6 x [7 x [2 x [1 x float]]]]]* %tmp41, i64 0, i64 %tmp70, i64 %tmp86, i64 %tmp94, i64 %tmp115, i64 0 + %tmp117 = getelementptr inbounds [10 x [5 x [2 x [1 x [2 x float]]]]], [10 x [5 x [2 x [1 x [2 x float]]]]]* %tmp8, i64 0, i64 %tmp84, i64 %tmp92, i64 %bugged, i64 0, i64 %tmp79 + %tmp118 = load float, float* %tmp117, align 4, !alias.scope !4, !noalias !7 + %tmp119 = load float, float* %tmp116, align 4, !alias.scope !11, !noalias !12 + %tmp120 = fmul reassoc nsz contract float %tmp118, %tmp119 + %tmp121 = fadd reassoc nsz contract float %tmp105, %tmp120 + br label %bb110 + +bb122: ; preds = %bb100 + %tmp123 = phi float [ %tmp101, %bb100 ] + br label %bb125 + +bb124: ; preds = %bb87 + br label %bb125 + +bb125: ; preds = %bb124, %bb122 + %tmp126 = phi float [ %tmp123, %bb122 ], [ %tmp81, %bb124 ] + %tmp127 = add nuw nsw i64 %tmp82, 1 + %tmp128 = icmp ugt i64 %tmp82, 4 + br i1 %tmp128, label %bb129, label %bb80 + +bb129: ; preds = %bb125 + %tmp130 = phi float [ %tmp126, %bb125 ] + %tmp131 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 %tmp70, i64 0, i64 %tmp72, i64 %tmp76, i64 %tmp79 + store float %tmp130, float* %tmp131, align 4, !alias.scope !13, !noalias !14 + %tmp132 = add nuw nsw i64 %tmp79, 1 + %tmp133 = icmp eq i64 %tmp79, 0 + br i1 %tmp133, label %bb78, label %bb134 + +bb134: ; preds = %bb129 + %tmp135 = add nuw nsw i64 %tmp76, 1 + %tmp136 = icmp eq i64 %tmp76, 0 + br i1 %tmp136, label %bb75, label %bb137 + +bb137: ; preds = %bb134 + %tmp138 = add nuw nsw i64 %tmp72, 1 + %tmp139 = icmp eq i64 %tmp72, 0 + br i1 %tmp139, label %bb71, label %bb140 + +bb140: ; preds = %bb137 + %tmp141 = add nuw nsw i64 %tmp70, 1 + %tmp142 = icmp eq i64 %tmp70, 0 + br i1 %tmp142, label %bb69, label %bb143 + +bb143: ; preds = %bb140 + %tmp144 = getelementptr inbounds i8*, i8** %arg3, i64 2 + %tmp145 = bitcast i8** %tmp144 to [4 x [2 x [1 x [2 x float]]]]** + %tmp146 = load [4 x [2 x [1 x [2 x float]]]]*, [4 x [2 x [1 x [2 x float]]]]** %tmp145, align 8, !invariant.load !0, !dereferenceable !16, !align !2 + br label %bb147 + +bb147: ; preds = %bb143 + br label %bb148 + +bb148: ; preds = %bb147 + br label %bb149 + +bb149: ; preds = %bb148 + %tmp150 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0 + %tmp151 = load float, float* %tmp150, align 4, !alias.scope !13, !noalias !14 + %tmp152 = getelementptr inbounds [4 x [2 x [1 x [2 x float]]]], [4 x [2 x [1 x [2 x float]]]]* %tmp146, i64 0, i64 0, i64 0, i64 0, i64 0 + store float %tmp151, float* %tmp152, align 4, !alias.scope !17, !noalias !13 + %tmp153 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 0, i64 0, i64 1, i64 0, i64 0 + %tmp154 = load float, float* %tmp153, align 4, !alias.scope !13, !noalias !14 + %tmp155 = getelementptr inbounds [4 x [2 x [1 x [2 x float]]]], [4 x [2 x [1 x [2 x float]]]]* %tmp146, i64 0, i64 0, i64 0, i64 0, i64 1 + store float %tmp154, float* %tmp155, align 4, !alias.scope !17, !noalias !13 + br label %bb156 + +bb156: ; preds = %bb149 + %tmp157 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 1, i64 0, i64 0, i64 0, i64 0 + %tmp158 = load float, float* %tmp157, align 4, !alias.scope !13, !noalias !14 + %tmp159 = getelementptr inbounds [4 x [2 x [1 x [2 x float]]]], [4 x [2 x [1 x [2 x float]]]]* %tmp146, i64 0, i64 0, i64 1, i64 0, i64 0 + store float %tmp158, float* %tmp159, align 4, !alias.scope !17, !noalias !13 + %tmp160 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 1, i64 0, i64 1, i64 0, i64 0 + %tmp161 = load float, float* %tmp160, align 4, !alias.scope !13, !noalias !14 + %tmp162 = getelementptr inbounds [4 x [2 x [1 x [2 x float]]]], [4 x [2 x [1 x [2 x float]]]]* %tmp146, i64 0, i64 0, i64 1, i64 0, i64 1 + store float %tmp161, float* %tmp162, align 4, !alias.scope !17, !noalias !13 + br label %bb163 + +bb163: ; preds = %bb156 + br label %bb164 + +bb164: ; preds = %bb163 + %tmp165 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 0, i64 0, i64 0, i64 0, i64 1 + %tmp166 = load float, float* %tmp165, align 4, !alias.scope !13, !noalias !14 + %tmp167 = getelementptr inbounds [4 x [2 x [1 x [2 x float]]]], [4 x [2 x [1 x [2 x float]]]]* %tmp146, i64 0, i64 1, i64 0, i64 0, i64 0 + store float %tmp166, float* %tmp167, align 4, !alias.scope !17, !noalias !13 + %tmp168 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 0, i64 0, i64 1, i64 0, i64 1 + %tmp169 = load float, float* %tmp168, align 4, !alias.scope !13, !noalias !14 + %tmp170 = getelementptr inbounds [4 x [2 x [1 x [2 x float]]]], [4 x [2 x [1 x [2 x float]]]]* %tmp146, i64 0, i64 1, i64 0, i64 0, i64 1 + store float %tmp169, float* %tmp170, align 4, !alias.scope !17, !noalias !13 + br label %bb171 + +bb171: ; preds = %bb164 + %tmp172 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 1, i64 0, i64 0, i64 0, i64 1 + %tmp173 = load float, float* %tmp172, align 4, !alias.scope !13, !noalias !14 + %tmp174 = getelementptr inbounds [4 x [2 x [1 x [2 x float]]]], [4 x [2 x [1 x [2 x float]]]]* %tmp146, i64 0, i64 1, i64 1, i64 0, i64 0 + store float %tmp173, float* %tmp174, align 4, !alias.scope !17, !noalias !13 + %tmp175 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 1, i64 0, i64 1, i64 0, i64 1 + %tmp176 = load float, float* %tmp175, align 4, !alias.scope !13, !noalias !14 + %tmp177 = getelementptr inbounds [4 x [2 x [1 x [2 x float]]]], [4 x [2 x [1 x [2 x float]]]]* %tmp146, i64 0, i64 1, i64 1, i64 0, i64 1 + store float %tmp176, float* %tmp177, align 4, !alias.scope !17, !noalias !13 + br label %bb178 + +bb178: ; preds = %bb171 + br label %bb179 + +bb179: ; preds = %bb178 + %tmp180 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 0, i64 0, i64 0, i64 1, i64 0 + %tmp181 = load float, float* %tmp180, align 4, !alias.scope !13, !noalias !14 + %tmp182 = getelementptr inbounds [4 x [2 x [1 x [2 x float]]]], [4 x [2 x [1 x [2 x float]]]]* %tmp146, i64 0, i64 2, i64 0, i64 0, i64 0 + store float %tmp181, float* %tmp182, align 4, !alias.scope !17, !noalias !13 + %tmp183 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 0, i64 0, i64 1, i64 1, i64 0 + %tmp184 = load float, float* %tmp183, align 4, !alias.scope !13, !noalias !14 + %tmp185 = getelementptr inbounds [4 x [2 x [1 x [2 x float]]]], [4 x [2 x [1 x [2 x float]]]]* %tmp146, i64 0, i64 2, i64 0, i64 0, i64 1 + store float %tmp184, float* %tmp185, align 4, !alias.scope !17, !noalias !13 + br label %bb186 + +bb186: ; preds = %bb179 + %tmp187 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 1, i64 0, i64 0, i64 1, i64 0 + %tmp188 = load float, float* %tmp187, align 4, !alias.scope !13, !noalias !14 + %tmp189 = getelementptr inbounds [4 x [2 x [1 x [2 x float]]]], [4 x [2 x [1 x [2 x float]]]]* %tmp146, i64 0, i64 2, i64 1, i64 0, i64 0 + store float %tmp188, float* %tmp189, align 4, !alias.scope !17, !noalias !13 + %tmp190 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 1, i64 0, i64 1, i64 1, i64 0 + %tmp191 = load float, float* %tmp190, align 4, !alias.scope !13, !noalias !14 + %tmp192 = getelementptr inbounds [4 x [2 x [1 x [2 x float]]]], [4 x [2 x [1 x [2 x float]]]]* %tmp146, i64 0, i64 2, i64 1, i64 0, i64 1 + store float %tmp191, float* %tmp192, align 4, !alias.scope !17, !noalias !13 + br label %bb193 + +bb193: ; preds = %bb186 + br label %bb194 + +bb194: ; preds = %bb193 + %tmp195 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 0, i64 0, i64 0, i64 1, i64 1 + %tmp196 = load float, float* %tmp195, align 4, !alias.scope !13, !noalias !14 + %tmp197 = getelementptr inbounds [4 x [2 x [1 x [2 x float]]]], [4 x [2 x [1 x [2 x float]]]]* %tmp146, i64 0, i64 3, i64 0, i64 0, i64 0 + store float %tmp196, float* %tmp197, align 4, !alias.scope !17, !noalias !13 + %tmp198 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 0, i64 0, i64 1, i64 1, i64 1 + %tmp199 = load float, float* %tmp198, align 4, !alias.scope !13, !noalias !14 + %tmp200 = getelementptr inbounds [4 x [2 x [1 x [2 x float]]]], [4 x [2 x [1 x [2 x float]]]]* %tmp146, i64 0, i64 3, i64 0, i64 0, i64 1 + store float %tmp199, float* %tmp200, align 4, !alias.scope !17, !noalias !13 + br label %bb201 + +bb201: ; preds = %bb194 + %tmp202 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 1, i64 0, i64 0, i64 1, i64 1 + %tmp203 = load float, float* %tmp202, align 4, !alias.scope !13, !noalias !14 + %tmp204 = getelementptr inbounds [4 x [2 x [1 x [2 x float]]]], [4 x [2 x [1 x [2 x float]]]]* %tmp146, i64 0, i64 3, i64 1, i64 0, i64 0 + store float %tmp203, float* %tmp204, align 4, !alias.scope !17, !noalias !13 + %tmp205 = getelementptr inbounds [2 x [1 x [2 x [2 x [2 x float]]]]], [2 x [1 x [2 x [2 x [2 x float]]]]]* %tmp68, i64 0, i64 1, i64 0, i64 1, i64 1, i64 1 + %tmp206 = load float, float* %tmp205, align 4, !alias.scope !13, !noalias !14 + %tmp207 = getelementptr inbounds [4 x [2 x [1 x [2 x float]]]], [4 x [2 x [1 x [2 x float]]]]* %tmp146, i64 0, i64 3, i64 1, i64 0, i64 1 + store float %tmp206, float* %tmp207, align 4, !alias.scope !17, !noalias !13 + ret void +} + +attributes #0 = { nofree norecurse nounwind uwtable "denormal-fp-math"="preserve-sign" "no-frame-pointer-elim"="false" } + +!0 = !{} +!1 = !{i64 800} +!2 = !{i64 16} +!3 = !{i64 1536} +!4 = !{!5} +!5 = !{!"buffer: {index:3, offset:0, size:800}", !6} +!6 = !{!"XLA global AA domain"} +!7 = !{!8, !9} +!8 = !{!"buffer: {index:3, offset:800, size:672}", !6} +!9 = !{!"buffer: {index:3, offset:1472, size:64}", !6} +!10 = !{i64 672} +!11 = !{!8} +!12 = !{!5, !9} +!13 = !{!9} +!14 = !{!15, !5, !8} +!15 = !{!"buffer: {index:2, offset:0, size:64}", !6} +!16 = !{i64 64} +!17 = !{!15} diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp index ff33495f22711..be8941838f71a 100644 --- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp +++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp @@ -1251,4 +1251,69 @@ TEST_F(ScalarEvolutionsTest, SCEVgetExitLimitForGuardedLoop) { }); } +TEST_F(ScalarEvolutionsTest, ImpliedViaAddRecStart) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString( + "define void @foo(i32* %p) { " + "entry: " + " %x = load i32, i32* %p, !range !0 " + " br label %loop " + "loop: " + " %iv = phi i32 [ %x, %entry], [%iv.next, %backedge] " + " %ne.check = icmp ne i32 %iv, 0 " + " br i1 %ne.check, label %backedge, label %exit " + "backedge: " + " %iv.next = add i32 %iv, -1 " + " br label %loop " + "exit:" + " ret void " + "} " + "!0 = !{i32 0, i32 2147483647}", + Err, C); + + ASSERT_TRUE(M && "Could not parse module?"); + ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto *X = SE.getSCEV(getInstructionByName(F, "x")); + auto *Context = getInstructionByName(F, "iv.next"); + EXPECT_TRUE(SE.isKnownPredicateAt(ICmpInst::ICMP_NE, X, + SE.getZero(X->getType()), Context)); + }); +} + +TEST_F(ScalarEvolutionsTest, UnsignedIsImpliedViaOperations) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = + parseAssemblyString("define void @foo(i32* %p1, i32* %p2) { " + "entry: " + " %x = load i32, i32* %p1, !range !0 " + " %cond = icmp ne i32 %x, 0 " + " br i1 %cond, label %guarded, label %exit " + "guarded: " + " %y = add i32 %x, -1 " + " ret void " + "exit: " + " ret void " + "} " + "!0 = !{i32 0, i32 2147483647}", + Err, C); + + ASSERT_TRUE(M && "Could not parse module?"); + ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto *X = SE.getSCEV(getInstructionByName(F, "x")); + auto *Y = SE.getSCEV(getInstructionByName(F, "y")); + auto *Guarded = getInstructionByName(F, "y")->getParent(); + ASSERT_TRUE(Guarded); + EXPECT_TRUE( + SE.isBasicBlockEntryGuardedByCond(Guarded, ICmpInst::ICMP_ULT, Y, X)); + EXPECT_TRUE( + SE.isBasicBlockEntryGuardedByCond(Guarded, ICmpInst::ICMP_UGT, X, Y)); + }); +} + } // end namespace llvm From b4ffc40d622bb2647852284c78c5e83346f2d630 Mon Sep 17 00:00:00 2001 From: Serge Guelton Date: Wed, 7 Oct 2020 13:43:55 -0400 Subject: [PATCH 291/321] Update documentation and implementation of stage3 build Have the build work out of the box by forcing an LLD build. That way, we don't require an external LTO-aware linker, as we build one. Also remove reference to the seemingly dead builder. Differential Revision: https://reviews.llvm.org/D88990 --- clang/cmake/caches/3-stage-base.cmake | 9 +++++++++ llvm/docs/AdvancedBuilds.rst | 6 ++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/clang/cmake/caches/3-stage-base.cmake b/clang/cmake/caches/3-stage-base.cmake index 46c747edd7784..88ab5d77f16fd 100644 --- a/clang/cmake/caches/3-stage-base.cmake +++ b/clang/cmake/caches/3-stage-base.cmake @@ -3,6 +3,15 @@ set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "") set(LLVM_BUILD_EXTERNAL_COMPILER_RT ON CACHE BOOL "") set(BOOTSTRAP_LLVM_ENABLE_LTO ON CACHE BOOL "") +# Use LLD do have less requirements on system linker, unless we're on an apple +# platform where the system compiler is to be prefered. +if(APPLE) + set(BOOTSTRAP_LLVM_ENABLE_LLD OFF CACHE BOOL "") +else() + set(BOOTSTRAP_LLVM_ENABLE_LLD ON CACHE BOOL "") +endif() + + set(CLANG_BOOTSTRAP_TARGETS clang check-all diff --git a/llvm/docs/AdvancedBuilds.rst b/llvm/docs/AdvancedBuilds.rst index 695dcfb62a1fd..1781726741f29 100644 --- a/llvm/docs/AdvancedBuilds.rst +++ b/llvm/docs/AdvancedBuilds.rst @@ -186,8 +186,6 @@ following commands: .. code-block:: console $ cmake -G Ninja -C /cmake/caches/3-stage.cmake - $ ninja stage3 + $ cmake --build . --target stage3 --parallel -After the build you can compare the stage2 & stage3 compilers. We have a bot -setup `here `_ that runs -this build and compare configuration. +After the build you can compare the stage2 & stage3 compilers. From 9b2b32743d71311eeb713ace2550c0039e2491a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 6 Oct 2020 13:23:57 +0300 Subject: [PATCH 292/321] [LLD] [ELF] Fix up a comment regarding the --wrap option. NFC. Add missing leading underscores to the __wrap_ and __real_ names. Differential Revision: https://reviews.llvm.org/D89008 --- lld/ELF/Driver.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index fa39628a21432..6aca2306d1e96 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1837,9 +1837,9 @@ template void LinkerDriver::compileBitcodeFiles() { // The --wrap option is a feature to rename symbols so that you can write // wrappers for existing functions. If you pass `-wrap=foo`, all -// occurrences of symbol `foo` are resolved to `wrap_foo` (so, you are -// expected to write `wrap_foo` function as a wrapper). The original -// symbol becomes accessible as `real_foo`, so you can call that from your +// occurrences of symbol `foo` are resolved to `__wrap_foo` (so, you are +// expected to write `__wrap_foo` function as a wrapper). The original +// symbol becomes accessible as `__real_foo`, so you can call that from your // wrapper. // // This data structure is instantiated for each -wrap option. From 9b58b0c06e6906583a1225e5c8c51dda9cbc7cfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Miku=C5=82a?= Date: Thu, 8 Oct 2020 09:34:18 +0300 Subject: [PATCH 293/321] [LLD] Ignore ELF tests when ld.lld defaults to MinGW Follow-up to D87418. Differential Revision: https://reviews.llvm.org/D88991 --- lld/test/CMakeLists.txt | 1 + lld/test/lit.cfg.py | 4 ++++ lld/test/lit.site.cfg.py.in | 1 + 3 files changed, 6 insertions(+) diff --git a/lld/test/CMakeLists.txt b/lld/test/CMakeLists.txt index ff957e8912114..6875a54df99c9 100644 --- a/lld/test/CMakeLists.txt +++ b/lld/test/CMakeLists.txt @@ -7,6 +7,7 @@ set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/%(build_config)s" llvm_canonicalize_cmake_booleans( LLVM_ENABLE_ZLIB LLVM_ENABLE_LIBXML2 + LLD_DEFAULT_LD_LLD_IS_MINGW ) configure_lit_site_cfg( diff --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py index 090a7c21fa782..c031505e5c3e2 100644 --- a/lld/test/lit.cfg.py +++ b/lld/test/lit.cfg.py @@ -110,3 +110,7 @@ sout, _ = tar_version.communicate() if 'GNU tar' in sout.decode(): config.available_features.add('gnutar') + +# ELF tests expect the default target for ld.lld to be ELF. +if config.ld_lld_default_mingw: + config.excludes.append('ELF') diff --git a/lld/test/lit.site.cfg.py.in b/lld/test/lit.site.cfg.py.in index bbc2c892eb715..a4e00b9dac930 100644 --- a/lld/test/lit.site.cfg.py.in +++ b/lld/test/lit.site.cfg.py.in @@ -16,6 +16,7 @@ config.python_executable = "@Python3_EXECUTABLE@" config.have_zlib = @LLVM_ENABLE_ZLIB@ config.have_libxml2 = @LLVM_ENABLE_LIBXML2@ config.sizeof_void_p = @CMAKE_SIZEOF_VOID_P@ +config.ld_lld_default_mingw = @LLD_DEFAULT_LD_LLD_IS_MINGW@ # Support substitution of the tools and libs dirs with user parameters. This is # used when we can't determine the tool dir at configuration time. From 81b4f3380b01c95ebc821bfafe1a304520ed39be Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 8 Oct 2020 08:05:48 +0100 Subject: [PATCH 294/321] [ARM] Replace llvm.experimental.vector.reduce.smax with llvm.vector.reduce.smax. NFC This fixes up some newer tests after D88787. --- .../CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll | 120 +++++++++--------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll index 13b831efabc57..a581734794b7d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll @@ -7,7 +7,7 @@ define arm_aapcs_vfpcc zeroext i8 @uminv16i8(<16 x i8> %vec, i8 zeroext %min) { ; CHECK-NEXT: vminv.u8 r0, q0 ; CHECK-NEXT: uxtb r0, r0 ; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec) + %x = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %vec) %cmp = icmp ult i8 %x, %min %1 = select i1 %cmp, i8 %x, i8 %min ret i8 %1 @@ -19,7 +19,7 @@ define arm_aapcs_vfpcc zeroext i16 @uminv8i16(<8 x i16> %vec, i16 zeroext %min) ; CHECK-NEXT: vminv.u16 r0, q0 ; CHECK-NEXT: uxth r0, r0 ; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec) + %x = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %vec) %cmp = icmp ult i16 %x, %min %1 = select i1 %cmp, i16 %x, i16 %min ret i16 %1 @@ -30,7 +30,7 @@ define arm_aapcs_vfpcc i32 @uminv4i32(<4 x i32> %vec, i32 %min) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vminv.u32 r0, q0 ; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec) + %x = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %vec) %cmp = icmp ult i32 %x, %min %1 = select i1 %cmp, i32 %x, i32 %min ret i32 %1 @@ -42,7 +42,7 @@ define arm_aapcs_vfpcc signext i8 @sminv16i8(<16 x i8> %vec, i8 signext %min) { ; CHECK-NEXT: vminv.s8 r0, q0 ; CHECK-NEXT: sxtb r0, r0 ; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec) + %x = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %vec) %cmp = icmp slt i8 %x, %min %1 = select i1 %cmp, i8 %x, i8 %min ret i8 %1 @@ -54,7 +54,7 @@ define arm_aapcs_vfpcc signext i16 @sminv8i16(<8 x i16> %vec, i16 signext %min) ; CHECK-NEXT: vminv.s16 r0, q0 ; CHECK-NEXT: sxth r0, r0 ; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec) + %x = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %vec) %cmp = icmp slt i16 %x, %min %1 = select i1 %cmp, i16 %x, i16 %min ret i16 %1 @@ -65,7 +65,7 @@ define arm_aapcs_vfpcc i32 @sminv4i32(<4 x i32> %vec, i32 %min) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vminv.s32 r0, q0 ; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec) + %x = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %vec) %cmp = icmp slt i32 %x, %min %1 = select i1 %cmp, i32 %x, i32 %min ret i32 %1 @@ -77,7 +77,7 @@ define arm_aapcs_vfpcc zeroext i8 @umaxv16i8(<16 x i8> %vec, i8 zeroext %max) { ; CHECK-NEXT: vmaxv.u8 r0, q0 ; CHECK-NEXT: uxtb r0, r0 ; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec) + %x = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %vec) %cmp = icmp ugt i8 %x, %max %1 = select i1 %cmp, i8 %x, i8 %max ret i8 %1 @@ -89,7 +89,7 @@ define arm_aapcs_vfpcc zeroext i16 @umaxv8i16(<8 x i16> %vec, i16 zeroext %max) ; CHECK-NEXT: vmaxv.u16 r0, q0 ; CHECK-NEXT: uxth r0, r0 ; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec) + %x = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %vec) %cmp = icmp ugt i16 %x, %max %1 = select i1 %cmp, i16 %x, i16 %max ret i16 %1 @@ -100,7 +100,7 @@ define arm_aapcs_vfpcc i32 @umaxv4i32(<4 x i32> %vec, i32 %max) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmaxv.u32 r0, q0 ; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec) + %x = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %vec) %cmp = icmp ugt i32 %x, %max %1 = select i1 %cmp, i32 %x, i32 %max ret i32 %1 @@ -112,7 +112,7 @@ define arm_aapcs_vfpcc signext i8 @smaxv16i8(<16 x i8> %vec, i8 signext %max) { ; CHECK-NEXT: vmaxv.s8 r0, q0 ; CHECK-NEXT: sxtb r0, r0 ; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) + %x = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %vec) %cmp = icmp sgt i8 %x, %max %1 = select i1 %cmp, i8 %x, i8 %max ret i8 %1 @@ -124,7 +124,7 @@ define arm_aapcs_vfpcc signext i16 @smaxv8i16(<8 x i16> %vec, i16 signext %max) ; CHECK-NEXT: vmaxv.s16 r0, q0 ; CHECK-NEXT: sxth r0, r0 ; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec) + %x = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %vec) %cmp = icmp sgt i16 %x, %max %1 = select i1 %cmp, i16 %x, i16 %max ret i16 %1 @@ -135,7 +135,7 @@ define arm_aapcs_vfpcc i32 @smaxv4i32(<4 x i32> %vec, i32 %max) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmaxv.s32 r0, q0 ; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec) + %x = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %vec) %cmp = icmp sgt i32 %x, %max %1 = select i1 %cmp, i32 %x, i32 %max ret i32 %1 @@ -147,7 +147,7 @@ define arm_aapcs_vfpcc zeroext i8 @commute_uminv16i8(<16 x i8> %vec, i8 zeroext ; CHECK-NEXT: vminv.u8 r0, q0 ; CHECK-NEXT: uxtb r0, r0 ; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec) + %x = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %vec) %cmp = icmp ult i8 %min, %x %1 = select i1 %cmp, i8 %min, i8 %x ret i8 %1 @@ -159,7 +159,7 @@ define arm_aapcs_vfpcc zeroext i16 @commute_uminv8i16(<8 x i16> %vec, i16 zeroex ; CHECK-NEXT: vminv.u16 r0, q0 ; CHECK-NEXT: uxth r0, r0 ; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec) + %x = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %vec) %cmp = icmp ult i16 %min, %x %1 = select i1 %cmp, i16 %min, i16 %x ret i16 %1 @@ -170,7 +170,7 @@ define arm_aapcs_vfpcc i32 @commute_uminv4i32(<4 x i32> %vec, i32 %min) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vminv.u32 r0, q0 ; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec) + %x = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %vec) %cmp = icmp ult i32 %min, %x %1 = select i1 %cmp, i32 %min, i32 %x ret i32 %1 @@ -182,7 +182,7 @@ define arm_aapcs_vfpcc signext i8 @commute_sminv16i8(<16 x i8> %vec, i8 signext ; CHECK-NEXT: vminv.s8 r0, q0 ; CHECK-NEXT: sxtb r0, r0 ; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec) + %x = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %vec) %cmp = icmp slt i8 %min, %x %1 = select i1 %cmp, i8 %min, i8 %x ret i8 %1 @@ -194,7 +194,7 @@ define arm_aapcs_vfpcc signext i16 @commute_sminv8i16(<8 x i16> %vec, i16 signex ; CHECK-NEXT: vminv.s16 r0, q0 ; CHECK-NEXT: sxth r0, r0 ; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec) + %x = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %vec) %cmp = icmp slt i16 %min, %x %1 = select i1 %cmp, i16 %min, i16 %x ret i16 %1 @@ -205,7 +205,7 @@ define arm_aapcs_vfpcc i32 @commute_sminv4i32(<4 x i32> %vec, i32 %min) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vminv.s32 r0, q0 ; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec) + %x = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %vec) %cmp = icmp slt i32 %min, %x %1 = select i1 %cmp, i32 %min, i32 %x ret i32 %1 @@ -217,7 +217,7 @@ define arm_aapcs_vfpcc zeroext i8 @commute_umaxv16i8(<16 x i8> %vec, i8 zeroext ; CHECK-NEXT: vmaxv.u8 r0, q0 ; CHECK-NEXT: uxtb r0, r0 ; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec) + %x = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %vec) %cmp = icmp ugt i8 %max, %x %1 = select i1 %cmp, i8 %max, i8 %x ret i8 %1 @@ -229,7 +229,7 @@ define arm_aapcs_vfpcc zeroext i16 @commute_umaxv8i16(<8 x i16> %vec, i16 zeroex ; CHECK-NEXT: vmaxv.u16 r0, q0 ; CHECK-NEXT: uxth r0, r0 ; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec) + %x = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %vec) %cmp = icmp ugt i16 %max, %x %1 = select i1 %cmp, i16 %max, i16 %x ret i16 %1 @@ -240,7 +240,7 @@ define arm_aapcs_vfpcc i32 @commute_umaxv4i32(<4 x i32> %vec, i32 %max) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmaxv.u32 r0, q0 ; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec) + %x = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %vec) %cmp = icmp ugt i32 %max, %x %1 = select i1 %cmp, i32 %max, i32 %x ret i32 %1 @@ -252,7 +252,7 @@ define arm_aapcs_vfpcc signext i8 @commute_smaxv16i8(<16 x i8> %vec, i8 signext ; CHECK-NEXT: vmaxv.s8 r0, q0 ; CHECK-NEXT: sxtb r0, r0 ; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) + %x = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %vec) %cmp = icmp sgt i8 %max, %x %1 = select i1 %cmp, i8 %max, i8 %x ret i8 %1 @@ -264,7 +264,7 @@ define arm_aapcs_vfpcc signext i16 @commute_smaxv8i16(<8 x i16> %vec, i16 signex ; CHECK-NEXT: vmaxv.s16 r0, q0 ; CHECK-NEXT: sxth r0, r0 ; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec) + %x = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %vec) %cmp = icmp sgt i16 %max, %x %1 = select i1 %cmp, i16 %max, i16 %x ret i16 %1 @@ -275,7 +275,7 @@ define arm_aapcs_vfpcc i32 @commute_smaxv4i32(<4 x i32> %vec, i32 %max) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmaxv.s32 r0, q0 ; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec) + %x = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %vec) %cmp = icmp sgt i32 %max, %x %1 = select i1 %cmp, i32 %max, i32 %x ret i32 %1 @@ -291,7 +291,7 @@ define arm_aapcs_vfpcc signext i8 @mismatch_smaxv16i8(<16 x i8> %vec, i8 signext ; CHECK-NEXT: csel r0, r0, r1, gt ; CHECK-NEXT: sxtb r0, r0 ; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) + %x = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %vec) %cmp = icmp sgt i8 %x, %max %1 = select i1 %cmp, i8 %max, i8 %x ret i8 %1 @@ -307,7 +307,7 @@ define arm_aapcs_vfpcc signext i8 @mismatch2_smaxv16i8(<16 x i8> %vec, i8 signex ; CHECK-NEXT: csel r0, r1, r0, gt ; CHECK-NEXT: sxtb r0, r0 ; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) + %x = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %vec) %cmp = icmp sgt i8 %max, %x %1 = select i1 %cmp, i8 %x, i8 %max ret i8 %1 @@ -319,7 +319,7 @@ define arm_aapcs_vfpcc zeroext i8 @inverted_uminv16i8(<16 x i8> %vec, i8 zeroext ; CHECK-NEXT: vminv.u8 r0, q0 ; CHECK-NEXT: uxtb r0, r0 ; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec) + %x = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %vec) %cmp = icmp ugt i8 %x, %min %1 = select i1 %cmp, i8 %min, i8 %x ret i8 %1 @@ -331,7 +331,7 @@ define arm_aapcs_vfpcc zeroext i16 @inverted_uminv8i16(<8 x i16> %vec, i16 zeroe ; CHECK-NEXT: vminv.u16 r0, q0 ; CHECK-NEXT: uxth r0, r0 ; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec) + %x = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %vec) %cmp = icmp ugt i16 %x, %min %1 = select i1 %cmp, i16 %min, i16 %x ret i16 %1 @@ -342,7 +342,7 @@ define arm_aapcs_vfpcc i32 @inverted_uminv4i32(<4 x i32> %vec, i32 %min) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vminv.u32 r0, q0 ; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec) + %x = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %vec) %cmp = icmp ugt i32 %x, %min %1 = select i1 %cmp, i32 %min, i32 %x ret i32 %1 @@ -354,7 +354,7 @@ define arm_aapcs_vfpcc signext i8 @inverted_sminv16i8(<16 x i8> %vec, i8 signext ; CHECK-NEXT: vminv.s8 r0, q0 ; CHECK-NEXT: sxtb r0, r0 ; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec) + %x = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %vec) %cmp = icmp sgt i8 %x, %min %1 = select i1 %cmp, i8 %min, i8 %x ret i8 %1 @@ -366,7 +366,7 @@ define arm_aapcs_vfpcc signext i16 @inverted_sminv8i16(<8 x i16> %vec, i16 signe ; CHECK-NEXT: vminv.s16 r0, q0 ; CHECK-NEXT: sxth r0, r0 ; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec) + %x = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %vec) %cmp = icmp sgt i16 %x, %min %1 = select i1 %cmp, i16 %min, i16 %x ret i16 %1 @@ -377,7 +377,7 @@ define arm_aapcs_vfpcc i32 @inverted_sminv4i32(<4 x i32> %vec, i32 %min) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vminv.s32 r0, q0 ; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec) + %x = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %vec) %cmp = icmp sgt i32 %x, %min %1 = select i1 %cmp, i32 %min, i32 %x ret i32 %1 @@ -389,7 +389,7 @@ define arm_aapcs_vfpcc zeroext i8 @inverted_umaxv16i8(<16 x i8> %vec, i8 zeroext ; CHECK-NEXT: vmaxv.u8 r0, q0 ; CHECK-NEXT: uxtb r0, r0 ; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec) + %x = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %vec) %cmp = icmp ult i8 %x, %max %1 = select i1 %cmp, i8 %max, i8 %x ret i8 %1 @@ -401,7 +401,7 @@ define arm_aapcs_vfpcc zeroext i16 @inverted_umaxv8i16(<8 x i16> %vec, i16 zeroe ; CHECK-NEXT: vmaxv.u16 r0, q0 ; CHECK-NEXT: uxth r0, r0 ; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec) + %x = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %vec) %cmp = icmp ult i16 %x, %max %1 = select i1 %cmp, i16 %max, i16 %x ret i16 %1 @@ -412,7 +412,7 @@ define arm_aapcs_vfpcc i32 @inverted_umaxv4i32(<4 x i32> %vec, i32 %max) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmaxv.u32 r0, q0 ; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec) + %x = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %vec) %cmp = icmp ult i32 %x, %max %1 = select i1 %cmp, i32 %max, i32 %x ret i32 %1 @@ -424,7 +424,7 @@ define arm_aapcs_vfpcc signext i8 @inverted_smaxv16i8(<16 x i8> %vec, i8 signext ; CHECK-NEXT: vmaxv.s8 r0, q0 ; CHECK-NEXT: sxtb r0, r0 ; CHECK-NEXT: bx lr - %x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec) + %x = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %vec) %cmp = icmp slt i8 %x, %max %1 = select i1 %cmp, i8 %max, i8 %x ret i8 %1 @@ -436,7 +436,7 @@ define arm_aapcs_vfpcc signext i16 @inverted_smaxv8i16(<8 x i16> %vec, i16 signe ; CHECK-NEXT: vmaxv.s16 r0, q0 ; CHECK-NEXT: sxth r0, r0 ; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec) + %x = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %vec) %cmp = icmp slt i16 %x, %max %1 = select i1 %cmp, i16 %max, i16 %x ret i16 %1 @@ -447,7 +447,7 @@ define arm_aapcs_vfpcc i32 @inverted_smaxv4i32(<4 x i32> %vec, i32 %max) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmaxv.s32 r0, q0 ; CHECK-NEXT: bx lr - %x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec) + %x = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %vec) %cmp = icmp slt i32 %x, %max %1 = select i1 %cmp, i32 %max, i32 %x ret i32 %1 @@ -464,7 +464,7 @@ define arm_aapcs_vfpcc signext i16 @trunc_and_sext(<8 x i16> %vec, i32 %max) #1 ; CHECK-NEXT: csel r0, r0, r1, gt ; CHECK-NEXT: sxth r0, r0 ; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec) + %x = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %vec) %xs = sext i16 %x to i32 %cmp = icmp sgt i32 %max, %xs %mt = trunc i32 %max to i16 @@ -482,7 +482,7 @@ define arm_aapcs_vfpcc signext i16 @trunc_and_zext(<8 x i16> %vec, i32 %max) #1 ; CHECK-NEXT: csel r0, r0, r1, gt ; CHECK-NEXT: sxth r0, r0 ; CHECK-NEXT: bx lr - %x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec) + %x = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %vec) %xs = zext i16 %x to i32 %cmp = icmp sgt i32 %max, %xs %mt = trunc i32 %max to i16 @@ -515,7 +515,7 @@ define arm_aapcs_vfpcc i64 @uminv2i64(<2 x i64> %vec, i64 %min) { ; CHECK-NEXT: csel r0, r5, r0, ne ; CHECK-NEXT: csel r1, r3, r1, ne ; CHECK-NEXT: pop {r4, r5, r7, pc} - %x = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %vec) + %x = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %vec) %cmp = icmp ult i64 %x, %min %1 = select i1 %cmp, i64 %x, i64 %min ret i64 %1 @@ -546,7 +546,7 @@ define arm_aapcs_vfpcc i64 @sminv2i64(<2 x i64> %vec, i64 %min) { ; CHECK-NEXT: csel r0, r5, r0, ne ; CHECK-NEXT: csel r1, r3, r1, ne ; CHECK-NEXT: pop {r4, r5, r7, pc} - %x = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %vec) + %x = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %vec) %cmp = icmp slt i64 %x, %min %1 = select i1 %cmp, i64 %x, i64 %min ret i64 %1 @@ -577,7 +577,7 @@ define arm_aapcs_vfpcc i64 @umaxv2i64(<2 x i64> %vec, i64 %max) { ; CHECK-NEXT: csel r0, r5, r0, ne ; CHECK-NEXT: csel r1, r3, r1, ne ; CHECK-NEXT: pop {r4, r5, r7, pc} - %x = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %vec) + %x = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %vec) %cmp = icmp ugt i64 %x, %max %1 = select i1 %cmp, i64 %x, i64 %max ret i64 %1 @@ -608,40 +608,40 @@ define arm_aapcs_vfpcc i64 @smaxv2i64(<2 x i64> %vec, i64 %max) { ; CHECK-NEXT: csel r0, r5, r0, ne ; CHECK-NEXT: csel r1, r3, r1, ne ; CHECK-NEXT: pop {r4, r5, r7, pc} - %x = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %vec) + %x = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %vec) %cmp = icmp sgt i64 %x, %max %1 = select i1 %cmp, i64 %x, i64 %max ret i64 %1 } -declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) -declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) -declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) -declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) -declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) -declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) -declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) -declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) From 498f89d1887752a207568311a9e657025ed4eeea Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 8 Oct 2020 08:28:58 +0100 Subject: [PATCH 295/321] [LV] Collect dead induction truncates We currently collect the ICmp and Add from an induction variable, marking them as dead so that vplan values are not created for them. This extends that to include any single use trunk from the ICmp, which allows the Add to more readily be removed too. This can help with costing vplan nodes, as the ICmp and Add are more reliably removed and are not double-counted. Differential Revision: https://reviews.llvm.org/D88873 --- .../Transforms/Vectorize/LoopVectorize.cpp | 9 +++- .../ARM/tail-folding-not-allowed.ll | 12 +++-- .../Transforms/LoopVectorize/X86/pr36524.ll | 3 -- .../LoopVectorize/X86/tail_loop_folding.ll | 44 +++++++++---------- .../test/Transforms/LoopVectorize/followup.ll | 2 +- .../LoopVectorize/if-pred-stores.ll | 8 +--- 6 files changed, 37 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d54a890a3ce20..e542c159d346f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7080,9 +7080,16 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions( // condition will be dead after vectorization if it's only used by the // branch. auto *Cmp = dyn_cast(Latch->getTerminator()->getOperand(0)); - if (Cmp && Cmp->hasOneUse()) + if (Cmp && Cmp->hasOneUse()) { DeadInstructions.insert(Cmp); + // The operands of the icmp is often a dead trunc, used by IndUpdate. + for (Value *Op : Cmp->operands()) { + if (isa(Op) && Op->hasOneUse()) + DeadInstructions.insert(cast(Op)); + } + } + // We create new "steps" for induction variable updates to which the original // induction variables map. An original update instruction will be dead if // all its users except the induction variable are dead. diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll index 18b130224f987..9228c6f7fdca7 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll @@ -218,11 +218,9 @@ define void @trunc_not_allowed(i32* noalias nocapture %A, i32* noalias nocapture ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw i32 [[TMP0]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i16 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 428 -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 428 +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 431, 428 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -234,10 +232,10 @@ define void @trunc_not_allowed(i32* noalias nocapture %A, i32* noalias nocapture ; CHECK: for.body: ; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD3:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[I_09]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 [[I_09]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_09]] ; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ADD3]] = add nuw nsw i32 [[I_09]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll index 2cd24131197b9..215ddda0af430 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll @@ -12,9 +12,6 @@ define void @foo() { ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 2, [[INDEX]] -; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[OFFSET_IDX1]] to i32 -; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 0 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80 diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll index 1017d79792f52..a6b1ba5c45702 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll @@ -172,50 +172,48 @@ define i32 @reduction_i32(i32* nocapture readonly %A, i32* nocapture readonly %B ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT2]], ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <8 x i64> [[INDUCTION]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <8 x i32>* -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP8]], i32 4, <8 x i1> [[TMP6]], <8 x i32> undef) -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>* -; CHECK-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP11]], i32 4, <8 x i1> [[TMP6]], <8 x i32> undef) -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD3]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[TMP13]] = add <8 x i32> [[TMP12]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[TMP4]] to i32 -; CHECK-NEXT: [[TMP15:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP13]], <8 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <8 x i64> [[INDUCTION]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP7]], i32 4, <8 x i1> [[TMP5]], <8 x i32> undef) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP10]], i32 4, <8 x i1> [[TMP5]], <8 x i32> undef) +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD3]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP12]] = add <8 x i32> [[TMP11]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP12]], <8 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP15]]) +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP13]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[SUM_0:%.*]] = phi i32 [ [[SUM_1:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDXA]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDXA]], align 4 ; CHECK-NEXT: [[ARRAYIDXB:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[ARRAYIDXB]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDXB]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP16]] ; CHECK-NEXT: [[SUM_1]] = add nuw nsw i32 [[ADD]], [[SUM_0]] ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP7:!llvm.loop !.*]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_1]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_1]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[SUM_1_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/followup.ll b/llvm/test/Transforms/LoopVectorize/followup.ll index 3aaf2ee85d034..cc7405833fbd4 100644 --- a/llvm/test/Transforms/LoopVectorize/followup.ll +++ b/llvm/test/Transforms/LoopVectorize/followup.ll @@ -32,7 +32,7 @@ for.end: ; CHECK-LABEL: @followup( ; CHECK-LABEL: vector.body: -; CHECK: br i1 %13, label %middle.block, label %vector.body, !llvm.loop ![[LOOP_VECTOR:[0-9]+]] +; CHECK: br i1 %{{[0-9]*}}, label %middle.block, label %vector.body, !llvm.loop ![[LOOP_VECTOR:[0-9]+]] ; CHECK-LABEL: for.body: ; CHECK: br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop ![[LOOP_EPILOGUE:[0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index 9a07c1c94b37d..5ad5457dd8028 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -249,13 +249,9 @@ define void @bug18724(i1 %cond) { ; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = add i32 [[VEC_PHI2]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI]] = select i1 undef, i32 [[VEC_PHI]], i32 [[TMP4]] ; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI5]] = select i1 undef, i32 [[VEC_PHI2]], i32 [[TMP5]] -; UNROLL-NOSIMPLIFY-NEXT: [[OFFSET_IDX6:%.*]] = add i64 undef, [[INDEX]] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = trunc i64 [[OFFSET_IDX6]] to i32 -; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION7:%.*]] = add i32 [[TMP6]], 0 -; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION8:%.*]] = add i32 [[TMP6]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]] ; UNROLL-NOSIMPLIFY: middle.block: ; UNROLL-NOSIMPLIFY-NEXT: [[BIN_RDX:%.*]] = add i32 [[PREDPHI5]], [[PREDPHI]] ; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 1, 0 From ba268d2fb09ceabfa03a6499258f973be62d7663 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Thu, 8 Oct 2020 09:35:49 +0200 Subject: [PATCH 296/321] [clangd] Fix a typo, NFC. --- clang-tools-extra/clangd/ClangdServer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index c52ec007bbdce..7322b71e57cea 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -274,7 +274,7 @@ class ClangdServer { /// Test the validity of a rename operation. /// - /// If NewName is provided, it peforms a name validation. + /// If NewName is provided, it performs a name validation. void prepareRename(PathRef File, Position Pos, llvm::Optional NewName, const RenameOptions &RenameOpts, From a96bcfb196740b5be217f6166462ee1206530520 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Thu, 8 Oct 2020 10:00:29 +0200 Subject: [PATCH 297/321] [AST][RecoveryExpr] Support dependent cast-expr in C for error-recovery. Suppress spurious "typecheck_cond_expect_scalar_operand" diagnostic. See whole context: https://reviews.llvm.org/D85025 Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D84387 --- clang/lib/Sema/SemaCast.cpp | 11 +++++++++++ clang/test/AST/ast-dump-recovery.c | 5 +++++ clang/test/Sema/error-dependence.c | 6 +++++- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index d59f1880a7fff..0bd240585bd7f 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -2707,6 +2707,17 @@ void CastOperation::CheckCStyleCast() { return; } + // If the type is dependent, we won't do any other semantic analysis now. + if (Self.getASTContext().isDependenceAllowed() && + (DestType->isDependentType() || SrcExpr.get()->isTypeDependent() || + SrcExpr.get()->isValueDependent())) { + assert((DestType->containsErrors() || SrcExpr.get()->containsErrors() || + SrcExpr.get()->containsErrors()) && + "should only occur in error-recovery path."); + assert(Kind == CK_Dependent); + return; + } + // Overloads are allowed with C extensions, so we need to support them. if (SrcExpr.get()->getType() == Self.Context.OverloadTy) { DeclAccessPair DAP; diff --git a/clang/test/AST/ast-dump-recovery.c b/clang/test/AST/ast-dump-recovery.c index 7b2bcf27eccee..d14aedebe4903 100644 --- a/clang/test/AST/ast-dump-recovery.c +++ b/clang/test/AST/ast-dump-recovery.c @@ -81,4 +81,9 @@ void test2() { // CHECK-NEXT: |-DeclRefExpr {{.*}} 'int *' lvalue // CHECK-NEXT: `-DeclRefExpr {{.*}} 'float' lvalue (ptr > f ? ptr : f); + + // CHECK: CStyleCastExpr {{.*}} 'float' contains-errors + // CHECK-NEXT: `-RecoveryExpr {{.*}} '' + // CHECK-NEXT: `-DeclRefExpr {{.*}} 'some_func' + (float)some_func(); } diff --git a/clang/test/Sema/error-dependence.c b/clang/test/Sema/error-dependence.c index b83a79f8c4c65..41733cdba3fe7 100644 --- a/clang/test/Sema/error-dependence.c +++ b/clang/test/Sema/error-dependence.c @@ -1,11 +1,15 @@ // RUN: %clang_cc1 -fsyntax-only -verify -frecovery-ast -fno-recovery-ast-type %s -int call(int); // expected-note2 {{'call' declared here}} +int call(int); // expected-note3 {{'call' declared here}} void test1(int s) { // verify "assigning to 'int' from incompatible type ''" is // not emitted. s = call(); // expected-error {{too few arguments to function call}} + + // verify diagnostic "operand of type '' where arithmetic or + // pointer type is required" is not emitted. + (float)call(); // expected-error {{too few arguments to function call}} } void test2(int* ptr, float f) { From 380087e6c9a226b28c24de3b18e202e20d430765 Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin Date: Tue, 6 Oct 2020 15:16:31 +0200 Subject: [PATCH 298/321] [AMDGPU] Add test with redundant copies to temporary stack slot produced by expandUnalignedLoad Differential Revision: https://reviews.llvm.org/D88895 --- .../AMDGPU/load-local-redundant-copies.ll | 184 ++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll diff --git a/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll b/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll new file mode 100644 index 0000000000000..b5f041cac54d8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll @@ -0,0 +1,184 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s + +; Test that checks for redundant copies to temporary stack slot produced by +; expandUnalignedLoad. + +define amdgpu_vs void @test(<4 x i32> inreg %arg1, <6 x float> addrspace(3)* %arg2) { +; CHECK-LABEL: test: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_mov_b32 s8, s4 +; CHECK-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 +; CHECK-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1 +; CHECK-NEXT: s_mov_b32 s6, -1 +; CHECK-NEXT: s_mov_b32 s7, 0xe8f000 +; CHECK-NEXT: s_add_u32 s4, s4, s8 +; CHECK-NEXT: s_addc_u32 s5, s5, 0 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, 8, v0 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 12, v0 +; CHECK-NEXT: s_mov_b32 m0, -1 +; CHECK-NEXT: ds_read_b32 v1, v1 +; CHECK-NEXT: ds_read_b32 v2, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 4, v0 +; CHECK-NEXT: ds_read_b32 v3, v3 +; CHECK-NEXT: ds_read_b32 v0, v0 +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:28 +; CHECK-NEXT: buffer_store_dword v1, off, s[4:7], 0 offset:24 +; CHECK-NEXT: s_waitcnt lgkmcnt(1) +; CHECK-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:20 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:16 +; CHECK-NEXT: s_waitcnt expcnt(1) +; CHECK-NEXT: buffer_load_dword v3, off, s[4:7], 0 offset:28 +; CHECK-NEXT: buffer_load_dword v2, off, s[4:7], 0 offset:24 +; CHECK-NEXT: buffer_load_dword v1, off, s[4:7], 0 offset:20 +; CHECK-NEXT: s_waitcnt expcnt(0) +; CHECK-NEXT: buffer_load_dword v0, off, s[4:7], 0 offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: exp mrt0 off, off, off, off +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_FLOAT] idxen +; CHECK-NEXT: s_endpgm + call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 0, float undef, float undef, float undef, float undef, i1 immarg false, i1 immarg false) + %var1 = load <6 x float>, <6 x float> addrspace(3)* %arg2, align 4 + %var2 = shufflevector <6 x float> %var1, <6 x float> undef, <4 x i32> + call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %var2, <4 x i32> %arg1, i32 0, i32 0, i32 0, i32 immarg 126, i32 immarg 0) + ret void +} + +define amdgpu_vs void @test_2(<4 x i32> inreg %arg1, i32 %arg2, i32 inreg %arg3, <8 x float> addrspace(3)* %arg4) { +; CHECK-LABEL: test_2: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; CHECK-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 +; CHECK-NEXT: s_mov_b32 s10, -1 +; CHECK-NEXT: s_mov_b32 s11, 0xe8f000 +; CHECK-NEXT: s_add_u32 s8, s8, s5 +; CHECK-NEXT: s_addc_u32 s9, s9, 0 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 24, v1 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 28, v1 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, 16, v1 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, 20, v1 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, 8, v1 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, 12, v1 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, 4, v1 +; CHECK-NEXT: s_mov_b32 m0, -1 +; CHECK-NEXT: ds_read_b32 v4, v2 +; CHECK-NEXT: ds_read_b32 v5, v3 +; CHECK-NEXT: ds_read_b32 v2, v6 +; CHECK-NEXT: ds_read_b32 v3, v7 +; CHECK-NEXT: ds_read_b32 v8, v8 +; CHECK-NEXT: ds_read_b32 v9, v9 +; CHECK-NEXT: ds_read_b32 v7, v10 +; CHECK-NEXT: ds_read_b32 v6, v1 +; CHECK-NEXT: s_waitcnt lgkmcnt(6) +; CHECK-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:28 +; CHECK-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:24 +; CHECK-NEXT: s_waitcnt lgkmcnt(4) +; CHECK-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:20 +; CHECK-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:16 +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:44 +; CHECK-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:40 +; CHECK-NEXT: s_waitcnt lgkmcnt(1) +; CHECK-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:36 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:32 +; CHECK-NEXT: tbuffer_store_format_xyzw v[6:9], v0, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen glc slc +; CHECK-NEXT: tbuffer_store_format_xyzw v[2:5], v0, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:16 glc slc +; CHECK-NEXT: s_endpgm + %load = load <8 x float>, <8 x float> addrspace(3)* %arg4, align 4 + %vec1 = shufflevector <8 x float> %load, <8 x float> undef, <4 x i32> + call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %vec1, <4 x i32> %arg1, i32 %arg2, i32 0, i32 %arg3, i32 immarg 77, i32 immarg 3) + %vec2 = shufflevector <8 x float> %load, <8 x float> undef, <4 x i32> + call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %vec2, <4 x i32> %arg1, i32 %arg2, i32 16, i32 %arg3, i32 immarg 77, i32 immarg 3) + ret void +} + +define amdgpu_vs void @test_3(i32 inreg %arg1, i32 inreg %arg2, <4 x i32> inreg %arg3, i32 %arg4, <6 x float> addrspace(3)* %arg5, <6 x float> addrspace(3)* %arg6) { +; CHECK-LABEL: test_3: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; CHECK-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 +; CHECK-NEXT: s_mov_b32 s10, -1 +; CHECK-NEXT: s_mov_b32 s11, 0xe8f000 +; CHECK-NEXT: s_add_u32 s8, s8, s6 +; CHECK-NEXT: s_addc_u32 s9, s9, 0 +; CHECK-NEXT: s_mov_b32 s7, s5 +; CHECK-NEXT: s_mov_b32 s6, s4 +; CHECK-NEXT: s_mov_b32 s5, s3 +; CHECK-NEXT: s_mov_b32 s4, s2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, 8, v1 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 12, v1 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 4, v1 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, 16, v1 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, 20, v1 +; CHECK-NEXT: v_mov_b32_e32 v9, s0 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, 8, v2 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, 12, v2 +; CHECK-NEXT: v_add_i32_e32 v12, vcc, 4, v2 +; CHECK-NEXT: v_add_i32_e32 v13, vcc, 16, v2 +; CHECK-NEXT: v_add_i32_e32 v14, vcc, 20, v2 +; CHECK-NEXT: s_mov_b32 m0, -1 +; CHECK-NEXT: ds_read_b32 v5, v0 +; CHECK-NEXT: ds_read_b32 v6, v3 +; CHECK-NEXT: ds_read_b32 v4, v4 +; CHECK-NEXT: ds_read_b32 v8, v8 +; CHECK-NEXT: ds_read_b32 v7, v7 +; CHECK-NEXT: ds_read_b32 v3, v1 +; CHECK-NEXT: s_waitcnt lgkmcnt(4) +; CHECK-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:44 +; CHECK-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:40 +; CHECK-NEXT: s_waitcnt lgkmcnt(3) +; CHECK-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:36 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:32 +; CHECK-NEXT: tbuffer_store_format_xyzw v[3:6], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:264 glc slc +; CHECK-NEXT: tbuffer_store_format_xy v[7:8], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:280 glc slc +; CHECK-NEXT: ds_read_b32 v0, v10 +; CHECK-NEXT: ds_read_b32 v1, v11 +; CHECK-NEXT: s_waitcnt expcnt(1) +; CHECK-NEXT: ds_read_b32 v3, v12 +; CHECK-NEXT: ds_read_b32 v4, v13 +; CHECK-NEXT: ds_read_b32 v2, v2 +; CHECK-NEXT: s_waitcnt lgkmcnt(3) +; CHECK-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:28 +; CHECK-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:24 +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:20 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:16 +; CHECK-NEXT: s_waitcnt expcnt(1) +; CHECK-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:28 +; CHECK-NEXT: s_waitcnt expcnt(0) +; CHECK-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:24 +; CHECK-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:20 +; CHECK-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:16 +; CHECK-NEXT: ds_read_b32 v5, v14 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: exp mrt0 off, off, off, off +; CHECK-NEXT: tbuffer_store_format_xyzw v[0:3], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:240 glc slc +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: tbuffer_store_format_xy v[4:5], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:256 glc slc +; CHECK-NEXT: s_endpgm + %load1 = load <6 x float>, <6 x float> addrspace(3)* %arg5, align 4 + %vec11 = shufflevector <6 x float> %load1, <6 x float> undef, <4 x i32> + call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %vec11, <4 x i32> %arg3, i32 %arg1, i32 264, i32 %arg2, i32 immarg 77, i32 immarg 3) + %vec12 = shufflevector <6 x float> %load1, <6 x float> undef, <2 x i32> + call void @llvm.amdgcn.struct.tbuffer.store.v2f32(<2 x float> %vec12, <4 x i32> %arg3, i32 %arg1, i32 280, i32 %arg2, i32 immarg 64, i32 immarg 3) + + call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 0, float undef, float undef, float undef, float undef, i1 immarg false, i1 immarg false) + + %load2 = load <6 x float>, <6 x float> addrspace(3)* %arg6, align 4 + %vec21 = shufflevector <6 x float> %load2, <6 x float> undef, <4 x i32> + call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %vec21, <4 x i32> %arg3, i32 %arg1, i32 240, i32 %arg2, i32 immarg 77, i32 immarg 3) + %vec22 = shufflevector <6 x float> %load2, <6 x float> undef, <2 x i32> + call void @llvm.amdgcn.struct.tbuffer.store.v2f32(<2 x float> %vec22, <4 x i32> %arg3, i32 %arg1, i32 256, i32 %arg2, i32 immarg 64, i32 immarg 3) + + ret void +} + +declare void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) +declare void @llvm.amdgcn.struct.tbuffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) +declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) From 7c88d13fd1c1a864124d96224996c62192d431f2 Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin Date: Tue, 6 Oct 2020 15:20:29 +0200 Subject: [PATCH 299/321] [AMDGPU] Prefer SplitVectorLoad/Store over expandUnalignedLoad/Store ExpandUnalignedLoad/Store can sometimes produce unnecessary copies to temporary stack slot. We should prefer splitting vectors if possible. Differential Revision: https://reviews.llvm.org/D88882 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 32 +++-- .../AMDGPU/load-local-redundant-copies.ll | 128 +++++------------- 2 files changed, 54 insertions(+), 106 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f1a5d8d66c4bd..4212f68cd2f1e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8014,13 +8014,6 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType().getVectorElementType() == MVT::i32 && "Custom lowering for non-i32 vectors hasn't been implemented."); - if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), - MemVT, *Load->getMemOperand())) { - SDValue Ops[2]; - std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); - return DAG.getMergeValues(Ops, DL); - } - unsigned Alignment = Load->getAlignment(); unsigned AS = Load->getAddressSpace(); if (Subtarget->hasLDSMisalignedBug() && @@ -8132,6 +8125,14 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return SplitVectorLoad(Op, DAG); } } + + if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + MemVT, *Load->getMemOperand())) { + SDValue Ops[2]; + std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); + return DAG.getMergeValues(Ops, DL); + } + return SDValue(); } @@ -8537,11 +8538,6 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { assert(VT.isVector() && Store->getValue().getValueType().getScalarType() == MVT::i32); - if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), - VT, *Store->getMemOperand())) { - return expandUnalignedStore(Store, DAG); - } - unsigned AS = Store->getAddressSpace(); if (Subtarget->hasLDSMisalignedBug() && AS == AMDGPUAS::FLAT_ADDRESS && @@ -8566,6 +8562,11 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // v3 stores not supported on SI. if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores()) return SplitVectorStore(Op, DAG); + + if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + VT, *Store->getMemOperand())) + return expandUnalignedStore(Store, DAG); + return SDValue(); } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) { switch (Subtarget->getMaxPrivateElementSize()) { @@ -8605,6 +8606,13 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return SplitVectorStore(Op, DAG); } + if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + VT, *Store->getMemOperand())) { + if (VT.isVector()) + return SplitVectorStore(Op, DAG); + return expandUnalignedStore(Store, DAG); + } + return SDValue(); } else { llvm_unreachable("unhandled address space"); diff --git a/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll b/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll index b5f041cac54d8..2de1423e5eea5 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll @@ -7,35 +7,15 @@ define amdgpu_vs void @test(<4 x i32> inreg %arg1, <6 x float> addrspace(3)* %arg2) { ; CHECK-LABEL: test: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_mov_b32 s8, s4 -; CHECK-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 -; CHECK-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1 -; CHECK-NEXT: s_mov_b32 s6, -1 -; CHECK-NEXT: s_mov_b32 s7, 0xe8f000 -; CHECK-NEXT: s_add_u32 s4, s4, s8 -; CHECK-NEXT: s_addc_u32 s5, s5, 0 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 12, v0 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, 8, v0 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, 12, v0 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 4, v0 ; CHECK-NEXT: s_mov_b32 m0, -1 -; CHECK-NEXT: ds_read_b32 v1, v1 -; CHECK-NEXT: ds_read_b32 v2, v2 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, 4, v0 +; CHECK-NEXT: ds_read_b32 v2, v1 +; CHECK-NEXT: ds_read_b32 v1, v4 ; CHECK-NEXT: ds_read_b32 v3, v3 ; CHECK-NEXT: ds_read_b32 v0, v0 -; CHECK-NEXT: s_waitcnt lgkmcnt(2) -; CHECK-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:28 -; CHECK-NEXT: buffer_store_dword v1, off, s[4:7], 0 offset:24 -; CHECK-NEXT: s_waitcnt lgkmcnt(1) -; CHECK-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:20 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:16 -; CHECK-NEXT: s_waitcnt expcnt(1) -; CHECK-NEXT: buffer_load_dword v3, off, s[4:7], 0 offset:28 -; CHECK-NEXT: buffer_load_dword v2, off, s[4:7], 0 offset:24 -; CHECK-NEXT: buffer_load_dword v1, off, s[4:7], 0 offset:20 -; CHECK-NEXT: s_waitcnt expcnt(0) -; CHECK-NEXT: buffer_load_dword v0, off, s[4:7], 0 offset:16 -; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: exp mrt0 off, off, off, off ; CHECK-NEXT: v_mov_b32_e32 v4, 0 ; CHECK-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_FLOAT] idxen @@ -50,42 +30,25 @@ define amdgpu_vs void @test(<4 x i32> inreg %arg1, <6 x float> addrspace(3)* %ar define amdgpu_vs void @test_2(<4 x i32> inreg %arg1, i32 %arg2, i32 inreg %arg3, <8 x float> addrspace(3)* %arg4) { ; CHECK-LABEL: test_2: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; CHECK-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; CHECK-NEXT: s_mov_b32 s10, -1 -; CHECK-NEXT: s_mov_b32 s11, 0xe8f000 -; CHECK-NEXT: s_add_u32 s8, s8, s5 -; CHECK-NEXT: s_addc_u32 s9, s9, 0 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, 28, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 24, v1 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, 28, v1 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 20, v1 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, 16, v1 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, 20, v1 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, 12, v1 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, 8, v1 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, 12, v1 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, 4, v1 ; CHECK-NEXT: s_mov_b32 m0, -1 ; CHECK-NEXT: ds_read_b32 v4, v2 -; CHECK-NEXT: ds_read_b32 v5, v3 +; CHECK-NEXT: ds_read_b32 v3, v3 ; CHECK-NEXT: ds_read_b32 v2, v6 -; CHECK-NEXT: ds_read_b32 v3, v7 +; CHECK-NEXT: ds_read_b32 v9, v7 ; CHECK-NEXT: ds_read_b32 v8, v8 -; CHECK-NEXT: ds_read_b32 v9, v9 ; CHECK-NEXT: ds_read_b32 v7, v10 ; CHECK-NEXT: ds_read_b32 v6, v1 -; CHECK-NEXT: s_waitcnt lgkmcnt(6) -; CHECK-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:28 -; CHECK-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:24 -; CHECK-NEXT: s_waitcnt lgkmcnt(4) -; CHECK-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:20 -; CHECK-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:16 -; CHECK-NEXT: s_waitcnt lgkmcnt(2) -; CHECK-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:44 -; CHECK-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:40 +; CHECK-NEXT: ds_read_b32 v5, v5 ; CHECK-NEXT: s_waitcnt lgkmcnt(1) -; CHECK-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:36 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:32 ; CHECK-NEXT: tbuffer_store_format_xyzw v[6:9], v0, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen glc slc +; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: tbuffer_store_format_xyzw v[2:5], v0, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:16 glc slc ; CHECK-NEXT: s_endpgm %load = load <8 x float>, <8 x float> addrspace(3)* %arg4, align 4 @@ -99,65 +62,42 @@ define amdgpu_vs void @test_2(<4 x i32> inreg %arg1, i32 %arg2, i32 inreg %arg3, define amdgpu_vs void @test_3(i32 inreg %arg1, i32 inreg %arg2, <4 x i32> inreg %arg3, i32 %arg4, <6 x float> addrspace(3)* %arg5, <6 x float> addrspace(3)* %arg6) { ; CHECK-LABEL: test_3: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; CHECK-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; CHECK-NEXT: s_mov_b32 s10, -1 -; CHECK-NEXT: s_mov_b32 s11, 0xe8f000 -; CHECK-NEXT: s_add_u32 s8, s8, s6 -; CHECK-NEXT: s_addc_u32 s9, s9, 0 ; CHECK-NEXT: s_mov_b32 s7, s5 ; CHECK-NEXT: s_mov_b32 s6, s4 ; CHECK-NEXT: s_mov_b32 s5, s3 ; CHECK-NEXT: s_mov_b32 s4, s2 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, 8, v1 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, 12, v1 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, 4, v1 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, 16, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, 16, v1 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, 12, v1 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 8, v1 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, 4, v1 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, 20, v1 ; CHECK-NEXT: v_mov_b32_e32 v9, s0 -; CHECK-NEXT: v_add_i32_e32 v10, vcc, 8, v2 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, 16, v2 ; CHECK-NEXT: v_add_i32_e32 v11, vcc, 12, v2 -; CHECK-NEXT: v_add_i32_e32 v12, vcc, 4, v2 -; CHECK-NEXT: v_add_i32_e32 v13, vcc, 16, v2 -; CHECK-NEXT: v_add_i32_e32 v14, vcc, 20, v2 +; CHECK-NEXT: v_add_i32_e32 v12, vcc, 8, v2 ; CHECK-NEXT: s_mov_b32 m0, -1 -; CHECK-NEXT: ds_read_b32 v5, v0 -; CHECK-NEXT: ds_read_b32 v6, v3 -; CHECK-NEXT: ds_read_b32 v4, v4 -; CHECK-NEXT: ds_read_b32 v8, v8 -; CHECK-NEXT: ds_read_b32 v7, v7 ; CHECK-NEXT: ds_read_b32 v3, v1 -; CHECK-NEXT: s_waitcnt lgkmcnt(4) -; CHECK-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:44 -; CHECK-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:40 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:36 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:32 +; CHECK-NEXT: ds_read_b32 v5, v4 +; CHECK-NEXT: ds_read_b32 v4, v7 +; CHECK-NEXT: ds_read_b32 v1, v8 +; CHECK-NEXT: ds_read_b32 v6, v6 +; CHECK-NEXT: ds_read_b32 v0, v0 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, 4, v2 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, 20, v2 +; CHECK-NEXT: s_waitcnt lgkmcnt(1) ; CHECK-NEXT: tbuffer_store_format_xyzw v[3:6], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:264 glc slc -; CHECK-NEXT: tbuffer_store_format_xy v[7:8], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:280 glc slc -; CHECK-NEXT: ds_read_b32 v0, v10 -; CHECK-NEXT: ds_read_b32 v1, v11 -; CHECK-NEXT: s_waitcnt expcnt(1) -; CHECK-NEXT: ds_read_b32 v3, v12 -; CHECK-NEXT: ds_read_b32 v4, v13 -; CHECK-NEXT: ds_read_b32 v2, v2 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:28 -; CHECK-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:24 -; CHECK-NEXT: s_waitcnt lgkmcnt(2) -; CHECK-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:20 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:16 -; CHECK-NEXT: s_waitcnt expcnt(1) -; CHECK-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:28 +; CHECK-NEXT: tbuffer_store_format_xy v[0:1], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:280 glc slc ; CHECK-NEXT: s_waitcnt expcnt(0) -; CHECK-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:24 -; CHECK-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:20 -; CHECK-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:16 -; CHECK-NEXT: ds_read_b32 v5, v14 -; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: ds_read_b32 v0, v2 +; CHECK-NEXT: ds_read_b32 v2, v12 +; CHECK-NEXT: ds_read_b32 v1, v7 +; CHECK-NEXT: ds_read_b32 v5, v8 +; CHECK-NEXT: ds_read_b32 v3, v11 +; CHECK-NEXT: ds_read_b32 v4, v10 +; CHECK-NEXT: s_waitcnt lgkmcnt(5) ; CHECK-NEXT: exp mrt0 off, off, off, off +; CHECK-NEXT: s_waitcnt lgkmcnt(1) ; CHECK-NEXT: tbuffer_store_format_xyzw v[0:3], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:240 glc slc ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: tbuffer_store_format_xy v[4:5], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:256 glc slc From e7cf723051cd4638cf5d2c407b756312292e7c18 Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Thu, 8 Oct 2020 07:38:41 +0000 Subject: [PATCH 300/321] [mlir] Added strides check to rank reducing subview verification Added missing strides check to verification method of rank reducing subview which enforces strides specification for the resulting type. Differential Revision: https://reviews.llvm.org/D88879 --- mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 115 ++++++++++++++++-------- mlir/test/IR/core-ops.mlir | 9 +- mlir/test/IR/invalid-ops.mlir | 38 +++++++- 3 files changed, 118 insertions(+), 44 deletions(-) diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index f2823c564ccef..f445a0cce242c 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -2823,19 +2823,30 @@ static SmallVector extractFromI64ArrayAttr(Attribute attr) { })); } +enum SubViewVerificationResult { + Success, + RankTooLarge, + SizeMismatch, + StrideMismatch, + ElemTypeMismatch, + MemSpaceMismatch, + AffineMapMismatch +}; + /// Checks if `original` Type type can be rank reduced to `reduced` type. /// This function is slight variant of `is subsequence` algorithm where /// not matching dimension must be 1. -static bool isRankReducedType(Type originalType, Type reducedType) { +static SubViewVerificationResult isRankReducedType(Type originalType, + Type reducedType) { if (originalType == reducedType) - return true; + return SubViewVerificationResult::Success; if (!originalType.isa() && !originalType.isa()) - return true; + return SubViewVerificationResult::Success; if (originalType.isa() && !reducedType.isa()) - return true; + return SubViewVerificationResult::Success; if (originalType.isa() && !reducedType.isa()) - return true; + return SubViewVerificationResult::Success; ShapedType originalShapedType = originalType.cast(); ShapedType reducedShapedType = reducedType.cast(); @@ -2846,7 +2857,7 @@ static bool isRankReducedType(Type originalType, Type reducedType) { unsigned originalRank = originalShape.size(), reducedRank = reducedShape.size(); if (reducedRank > originalRank) - return false; + return SubViewVerificationResult::RankTooLarge; unsigned reducedIdx = 0; SmallVector keepMask(originalRank); @@ -2858,41 +2869,78 @@ static bool isRankReducedType(Type originalType, Type reducedType) { reducedIdx++; // 1 is the only non-matching allowed. else if (originalShape[originalIdx] != 1) - return false; + return SubViewVerificationResult::SizeMismatch; } // Must match the reduced rank. if (reducedIdx != reducedRank) - return false; + return SubViewVerificationResult::SizeMismatch; // We are done for the tensor case. if (originalType.isa()) - return true; + return SubViewVerificationResult::Success; // Strided layout logic is relevant for MemRefType only. MemRefType original = originalType.cast(); MemRefType reduced = reducedType.cast(); MLIRContext *c = original.getContext(); - int64_t originalOffset, symCounter = 0, dimCounter = 0; - SmallVector originalStrides; + int64_t originalOffset, reducedOffset; + SmallVector originalStrides, reducedStrides, keepStrides; getStridesAndOffset(original, originalStrides, originalOffset); - auto getSymbolOrConstant = [&](int64_t offset) { - return offset == ShapedType::kDynamicStrideOrOffset - ? getAffineSymbolExpr(symCounter++, c) - : getAffineConstantExpr(offset, c); - }; - - AffineExpr expr = getSymbolOrConstant(originalOffset); - for (unsigned i = 0, e = originalStrides.size(); i < e; i++) { - if (keepMask[i]) - expr = expr + getSymbolOrConstant(originalStrides[i]) * - getAffineDimExpr(dimCounter++, c); + getStridesAndOffset(reduced, reducedStrides, reducedOffset); + + // Filter strides based on the mask and check that they are the same + // as reduced ones. + reducedIdx = 0; + for (unsigned originalIdx = 0; originalIdx < originalRank; ++originalIdx) { + if (keepMask[originalIdx]) { + if (originalStrides[originalIdx] != reducedStrides[reducedIdx++]) + return SubViewVerificationResult::StrideMismatch; + keepStrides.push_back(originalStrides[originalIdx]); + } } - auto reducedMap = AffineMap::get(dimCounter, symCounter, expr, c); - return original.getElementType() == reduced.getElementType() && - original.getMemorySpace() == reduced.getMemorySpace() && - (reduced.getAffineMaps().empty() || - reducedMap == reduced.getAffineMaps().front()); + if (original.getElementType() != reduced.getElementType()) + return SubViewVerificationResult::ElemTypeMismatch; + + if (original.getMemorySpace() != reduced.getMemorySpace()) + return SubViewVerificationResult::MemSpaceMismatch; + + auto reducedMap = makeStridedLinearLayoutMap(keepStrides, originalOffset, c); + if (!reduced.getAffineMaps().empty() && + reducedMap != reduced.getAffineMaps().front()) + return SubViewVerificationResult::AffineMapMismatch; + + return SubViewVerificationResult::Success; +} + +template +static LogicalResult produceSubViewErrorMsg(SubViewVerificationResult result, + OpTy op, Type expectedType) { + auto memrefType = expectedType.cast(); + switch (result) { + case SubViewVerificationResult::Success: + return success(); + case SubViewVerificationResult::RankTooLarge: + return op.emitError("expected result rank to be smaller or equal to ") + << "the source rank."; + case SubViewVerificationResult::SizeMismatch: + return op.emitError("expected result type to be ") + << expectedType + << " or a rank-reduced version. (mismatch of result sizes)"; + case SubViewVerificationResult::StrideMismatch: + return op.emitError("expected result type to be ") + << expectedType + << " or a rank-reduced version. (mismatch of result strides)"; + case SubViewVerificationResult::ElemTypeMismatch: + return op.emitError("expected result element type to be ") + << memrefType.getElementType(); + case SubViewVerificationResult::MemSpaceMismatch: + return op.emitError("expected result and source memory spaces to match."); + case SubViewVerificationResult::AffineMapMismatch: + return op.emitError("expected result type to be ") + << expectedType + << " or a rank-reduced version. (mismatch of result affine map)"; + } } template @@ -2937,11 +2985,9 @@ static LogicalResult verify(SubViewOp op) { baseType, extractFromI64ArrayAttr(op.static_offsets()), extractFromI64ArrayAttr(op.static_sizes()), extractFromI64ArrayAttr(op.static_strides())); - if (!isRankReducedType(expectedType, subViewType)) - return op.emitError("expected result type to be ") - << expectedType << " or a rank-reduced version."; - return success(); + auto result = isRankReducedType(expectedType, subViewType); + return produceSubViewErrorMsg(result, op, expectedType); } raw_ostream &mlir::operator<<(raw_ostream &os, Range &range) { @@ -3352,11 +3398,8 @@ static LogicalResult verify(SubTensorOp op) { op.getSourceType(), extractFromI64ArrayAttr(op.static_offsets()), extractFromI64ArrayAttr(op.static_sizes()), extractFromI64ArrayAttr(op.static_strides())); - if (!isRankReducedType(expectedType, op.getType())) - return op.emitError("expected result type to be ") - << expectedType << " or a rank-reduced version."; - - return success(); + auto result = isRankReducedType(expectedType, op.getType()); + return produceSubViewErrorMsg(result, op, expectedType); } void SubTensorOp::getCanonicalizationPatterns(OwningRewritePatternList &results, diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index 2590dc0105c4e..219c3bc84d570 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -21,6 +21,7 @@ // CHECK-DAG: #[[$SUBVIEW_MAP5:map[0-9]+]] = affine_map<(d0, d1)[s0] -> (d0 * 8 + s0 + d1 * 2)> // CHECK-DAG: #[[$SUBVIEW_MAP6:map[0-9]+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0 * 36 + d1 * 36 + d2 * 4 + d3 * 4 + d4)> // CHECK-DAG: #[[$SUBVIEW_MAP7:map[0-9]+]] = affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4 + d4 * s5 + d5 * s6)> +// CHECK-DAG: #[[$SUBVIEW_MAP8:map[0-9]+]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)> // CHECK-LABEL: func @func_with_ops // CHECK-SAME: %[[ARG:.*]]: f32 @@ -811,11 +812,11 @@ func @memref_subview(%arg0 : index, %arg1 : index, %arg2 : index) { %15 = alloc(%arg1, %arg2)[%c0, %c1, %arg1, %arg0, %arg0, %arg2, %arg2] : memref<1x?x5x1x?x1xf32, affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6] -> (s0 + s1 * d0 + s2 * d1 + s3 * d2 + s4 * d3 + s5 * d4 + s6 * d5)>> // CHECK: subview %15[0, 0, 0, 0, 0, 0] [1, %arg1, 5, 1, %arg2, 1] [1, 1, 1, 1, 1, 1] : - // CHECK-SAME: memref<1x?x5x1x?x1xf32, #[[$SUBVIEW_MAP7]]> to memref - %16 = subview %15[0, 0, 0, 0, 0, 0][1, %arg1, 5, 1, %arg2, 1][1, 1, 1, 1, 1, 1] : memref<1x?x5x1x?x1xf32, offset: ?, strides: [?, ?, ?, ?, ?, ?]> to memref + // CHECK-SAME: memref<1x?x5x1x?x1xf32, #[[$SUBVIEW_MAP7]]> to memref + %16 = subview %15[0, 0, 0, 0, 0, 0][1, %arg1, 5, 1, %arg2, 1][1, 1, 1, 1, 1, 1] : memref<1x?x5x1x?x1xf32, offset: ?, strides: [?, ?, ?, ?, ?, ?]> to memref // CHECK: subview %15[%arg1, %arg1, %arg1, %arg1, %arg1, %arg1] [1, %arg1, 5, 1, %arg2, 1] [1, 1, 1, 1, 1, 1] : - // CHECK-SAME: memref<1x?x5x1x?x1xf32, #[[$SUBVIEW_MAP7]]> to memref - %17 = subview %15[%arg1, %arg1, %arg1, %arg1, %arg1, %arg1][1, %arg1, 5, 1, %arg2, 1][1, 1, 1, 1, 1, 1] : memref<1x?x5x1x?x1xf32, offset: ?, strides: [?, ?, ?, ?, ?, ?]> to memref + // CHECK-SAME: memref<1x?x5x1x?x1xf32, #[[$SUBVIEW_MAP7]]> to memref + %17 = subview %15[%arg1, %arg1, %arg1, %arg1, %arg1, %arg1][1, %arg1, 5, 1, %arg2, 1][1, 1, 1, 1, 1, 1] : memref<1x?x5x1x?x1xf32, offset: ?, strides: [?, ?, ?, ?, ?, ?]> to memref %18 = alloc() : memref<1x8xf32> // CHECK: subview %18[0, 0] [1, 8] [1, 1] : memref<1x8xf32> to memref<8xf32> diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir index 7356c07577dba..b59353aa2f7c5 100644 --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -1011,7 +1011,7 @@ func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<8x16x4xf32> - // expected-error@+1 {{expected result type to be 'memref (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>'}} + // expected-error@+1 {{expected result type to be 'memref (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>' or a rank-reduced version. (mismatch of result strides)}} %1 = subview %0[%arg0, %arg1, %arg2][%arg0, %arg1, %arg2][%arg0, %arg1, %arg2] : memref<8x16x4xf32> to memref @@ -1020,9 +1020,31 @@ func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { // ----- +func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { + %0 = alloc() : memref<8x16x4xf32> + // expected-error@+1 {{expected result element type to be 'f32'}} + %1 = subview %0[0, 0, 0][8, 16, 4][1, 1, 1] + : memref<8x16x4xf32> to + memref<8x16x4xi32> + return +} + +// ----- + +func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { + %0 = alloc() : memref<8x16x4xf32> + // expected-error@+1 {{expected result rank to be smaller or equal to the source rank.}} + %1 = subview %0[0, 0, 0][8, 16, 4][1, 1, 1] + : memref<8x16x4xf32> to + memref<8x16x4x3xi32> + return +} + +// ----- + func @invalid_rank_reducing_subview(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<8x16x4xf32> - // expected-error@+1 {{expected result type to be 'memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>>'}} + // expected-error@+1 {{expected result type to be 'memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>>' or a rank-reduced version. (mismatch of result sizes)}} %1 = subview %0[0, 0, 0][8, 16, 4][1, 1, 1] : memref<8x16x4xf32> to memref<16x4xf32> return @@ -1030,6 +1052,14 @@ func @invalid_rank_reducing_subview(%arg0 : index, %arg1 : index, %arg2 : index) // ----- +func @invalid_rank_reducing_subview(%arg0 : memref, %arg1 : index, %arg2 : index) { + // expected-error@+1 {{expected result type to be 'memref (d0 * s1 + s0 + d1)>>' or a rank-reduced version. (mismatch of result strides)}} + %0 = subview %arg0[0, %arg1][%arg2, 1][1, 1] : memref to memref + return +} + +// ----- + func @invalid_memref_cast(%arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]>) { // expected-error@+1{{operand type 'memref<12x4x16xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 16 + d2)>>' and result type 'memref<12x4x16xf32, affine_map<(d0, d1, d2) -> (d0 * 128 + d1 * 32 + d2 * 2)>>' are cast incompatible}} %0 = memref_cast %arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]> to memref<12x4x16xf32, offset:0, strides:[128, 32, 2]> @@ -1259,7 +1289,7 @@ func @imaginary_part_from_incompatible_complex_type(%cplx: complex) { // ----- func @subtensor_wrong_dynamic_type(%t: tensor<8x16x4xf32>, %idx : index) { - // expected-error @+1 {{expected result type to be 'tensor<4x4x4xf32>'}} + // expected-error @+1 {{expected result type to be 'tensor<4x4x4xf32>' or a rank-reduced version. (mismatch of result sizes)}} %0 = subtensor %t[0, 2, 0][4, 4, 4][1, 1, 1] : tensor<8x16x4xf32> to tensor @@ -1269,7 +1299,7 @@ func @subtensor_wrong_dynamic_type(%t: tensor<8x16x4xf32>, %idx : index) { // ----- func @subtensor_wrong_static_type(%t: tensor<8x16x4xf32>, %idx : index) { - // expected-error @+1 {{expected result type to be 'tensor'}} + // expected-error @+1 {{expected result type to be 'tensor' or a rank-reduced version. (mismatch of result sizes)}} %0 = subtensor %t[0, 0, 0][%idx, 3, %idx][1, 1, 1] : tensor<8x16x4xf32> to tensor<4x4x4xf32> From 8e9622f96120842852f37bfaeea738b4b4ed5cb0 Mon Sep 17 00:00:00 2001 From: Pierre Gousseau Date: Thu, 8 Oct 2020 09:45:59 +0100 Subject: [PATCH 301/321] [cmake] Fix cmake warning in standalone compiler-rt builds. ``` cd compiler-rt/build cmake -G Ninja ../ -DCOMPILER_RT_STANDALONE_BUILD=ON -DLLVM_CONFIG_PATH=<...>llvm-project/build/bin/llvm-config -DCOMPILER_RT_INCLUDE_TESTS=ON ``` ``` -- check-shadowcallstack does nothing. Traceback (most recent call last): File "", line 22, in IndexError: list index out of range -- Configuring done -- Generating done ``` Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D88957 --- llvm/cmake/modules/AddLLVM.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index e57abea427530..4e9b1f6c23322 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -1494,6 +1494,7 @@ def relpath(p):\n if os.path.splitdrive(p)[0] != os.path.splitdrive(base)[0]: return p\n if haslink(p) or haslink(base): return p\n return os.path.relpath(p, base)\n +if len(sys.argv) < 3: sys.exit(0)\n sys.stdout.write(';'.join(relpath(p) for p in sys.argv[2].split(';')))" ${basedir} ${pathlist_escaped} From 568035ac3955790aee2a5dbc2b1f4074c76bb4d7 Mon Sep 17 00:00:00 2001 From: Luqman Aden Date: Thu, 8 Oct 2020 01:43:50 -0700 Subject: [PATCH 302/321] [llvm-readobj] Add --coff-tls-directory flag to print TLS Directory & test. Akin to dumpbin's /TLS option, this will print out the TLS directory, if present, in the image. Example output: ``` > llvm-readobj --coff-tls-directory test.exe File: test.exe Format: COFF-x86-64 Arch: x86_64 AddressSize: 64bit TLSDirectory { StartAddressOfRawData: 0x140004000 EndAddressOfRawData: 0x140004040 AddressOfIndex: 0x140002000 AddressOfCallBacks: 0x0 SizeOfZeroFill: 0x0 Characteristics [ (0x0) ] } ``` Reviewed By: jhenderson, grimar Differential Revision: https://reviews.llvm.org/D88635 --- llvm/docs/CommandGuide/llvm-readobj.rst | 4 + llvm/include/llvm/BinaryFormat/COFF.h | 1 + llvm/include/llvm/Object/COFF.h | 10 ++ llvm/lib/Object/COFFObjectFile.cpp | 41 ++++- .../llvm-readobj/COFF/tls-directory.test | 162 ++++++++++++++++++ llvm/tools/llvm-readobj/COFFDumper.cpp | 27 +++ llvm/tools/llvm-readobj/ObjDumper.h | 1 + llvm/tools/llvm-readobj/llvm-readobj.cpp | 6 + 8 files changed, 250 insertions(+), 2 deletions(-) create mode 100644 llvm/test/tools/llvm-readobj/COFF/tls-directory.test diff --git a/llvm/docs/CommandGuide/llvm-readobj.rst b/llvm/docs/CommandGuide/llvm-readobj.rst index 9b1b5ba92bc07..ba5511bb765a6 100644 --- a/llvm/docs/CommandGuide/llvm-readobj.rst +++ b/llvm/docs/CommandGuide/llvm-readobj.rst @@ -286,6 +286,10 @@ The following options are implemented only for the PE/COFF file format. Display the debug directory. +.. option:: --coff-tls-directory + + Display the TLS directory. + .. option:: --coff-directives Display the .drectve section. diff --git a/llvm/include/llvm/BinaryFormat/COFF.h b/llvm/include/llvm/BinaryFormat/COFF.h index 1919d7f0dece9..716d649f7c51c 100644 --- a/llvm/include/llvm/BinaryFormat/COFF.h +++ b/llvm/include/llvm/BinaryFormat/COFF.h @@ -311,6 +311,7 @@ enum SectionCharacteristics : uint32_t { IMAGE_SCN_ALIGN_2048BYTES = 0x00C00000, IMAGE_SCN_ALIGN_4096BYTES = 0x00D00000, IMAGE_SCN_ALIGN_8192BYTES = 0x00E00000, + IMAGE_SCN_ALIGN_MASK = 0x00F00000, IMAGE_SCN_LNK_NRELOC_OVFL = 0x01000000, IMAGE_SCN_MEM_DISCARDABLE = 0x02000000, IMAGE_SCN_MEM_NOT_CACHED = 0x04000000, diff --git a/llvm/include/llvm/Object/COFF.h b/llvm/include/llvm/Object/COFF.h index 8aef00a8809dc..505aab8bff5b3 100644 --- a/llvm/include/llvm/Object/COFF.h +++ b/llvm/include/llvm/Object/COFF.h @@ -786,6 +786,8 @@ class COFFObjectFile : public ObjectFile { const coff_base_reloc_block_header *BaseRelocEnd; const debug_directory *DebugDirectoryBegin; const debug_directory *DebugDirectoryEnd; + const coff_tls_directory32 *TLSDirectory32; + const coff_tls_directory64 *TLSDirectory64; // Either coff_load_configuration32 or coff_load_configuration64. const void *LoadConfig = nullptr; @@ -805,6 +807,7 @@ class COFFObjectFile : public ObjectFile { Error initExportTablePtr(); Error initBaseRelocPtr(); Error initDebugDirectoryPtr(); + Error initTLSDirectoryPtr(); Error initLoadConfigPtr(); public: @@ -976,6 +979,13 @@ class COFFObjectFile : public ObjectFile { return make_range(debug_directory_begin(), debug_directory_end()); } + const coff_tls_directory32 *getTLSDirectory32() const { + return TLSDirectory32; + } + const coff_tls_directory64 *getTLSDirectory64() const { + return TLSDirectory64; + } + const dos_header *getDOSHeader() const { if (!PE32Header && !PE32PlusHeader) return nullptr; diff --git a/llvm/lib/Object/COFFObjectFile.cpp b/llvm/lib/Object/COFFObjectFile.cpp index c26d7721b3fe9..cd10e67af239e 100644 --- a/llvm/lib/Object/COFFObjectFile.cpp +++ b/llvm/lib/Object/COFFObjectFile.cpp @@ -649,6 +649,38 @@ Error COFFObjectFile::initDebugDirectoryPtr() { return Error::success(); } +Error COFFObjectFile::initTLSDirectoryPtr() { + // Get the RVA of the TLS directory. Do nothing if it does not exist. + const data_directory *DataEntry = getDataDirectory(COFF::TLS_TABLE); + if (!DataEntry) + return Error::success(); + + // Do nothing if the RVA is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return Error::success(); + + uint64_t DirSize = + is64() ? sizeof(coff_tls_directory64) : sizeof(coff_tls_directory32); + + // Check that the size is correct. + if (DataEntry->Size != DirSize) + return createStringError( + object_error::parse_failed, + "TLS Directory size (%u) is not the expected size (%u).", + static_cast(DataEntry->Size), DirSize); + + uintptr_t IntPtr = 0; + if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + return E; + + if (is64()) + TLSDirectory64 = reinterpret_cast(IntPtr); + else + TLSDirectory32 = reinterpret_cast(IntPtr); + + return Error::success(); +} + Error COFFObjectFile::initLoadConfigPtr() { // Get the RVA of the debug directory. Do nothing if it does not exist. const data_directory *DataEntry = getDataDirectory(COFF::LOAD_CONFIG_TABLE); @@ -682,7 +714,8 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object) ImportDirectory(nullptr), DelayImportDirectory(nullptr), NumberOfDelayImportDirectory(0), ExportDirectory(nullptr), BaseRelocHeader(nullptr), BaseRelocEnd(nullptr), - DebugDirectoryBegin(nullptr), DebugDirectoryEnd(nullptr) {} + DebugDirectoryBegin(nullptr), DebugDirectoryEnd(nullptr), + TLSDirectory32(nullptr), TLSDirectory64(nullptr) {} Error COFFObjectFile::initialize() { // Check that we at least have enough room for a header. @@ -809,10 +842,14 @@ Error COFFObjectFile::initialize() { if (Error E = initBaseRelocPtr()) return E; - // Initialize the pointer to the export table. + // Initialize the pointer to the debug directory. if (Error E = initDebugDirectoryPtr()) return E; + // Initialize the pointer to the TLS directory. + if (Error E = initTLSDirectoryPtr()) + return E; + if (Error E = initLoadConfigPtr()) return E; diff --git a/llvm/test/tools/llvm-readobj/COFF/tls-directory.test b/llvm/test/tools/llvm-readobj/COFF/tls-directory.test new file mode 100644 index 0000000000000..d553130e0a017 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/COFF/tls-directory.test @@ -0,0 +1,162 @@ +## Tests for the --coff-tls-directory flag. + +## Test that the output of --coff-tls-directory works on x86. +## The binary created from this yaml definition is such that .rdata contains +## only the IMAGE_TLS_DIRECTORY structure and hence we should have that +## TlsTable.RelativeVirtualAddress == .rdata section VirtualAddress. +## Also note that the .rdata section VirtualSize == sizeof(coff_tls_directory32) == sizeof(IMAGE_TLS_DIRECTORY32) == 24 + +# RUN: yaml2obj %s --docnum=1 -o %t.32.exe -DTLSRVA=10000 -DTLSSIZE=24 +# RUN: llvm-readobj --coff-tls-directory %t.32.exe | FileCheck %s --check-prefix I386 + +# I386: Arch: i386 +# I386-NEXT: AddressSize: 32bit +# I386-NEXT: TLSDirectory { +# I386-NEXT: StartAddressOfRawData: 0x404000 +# I386-NEXT: EndAddressOfRawData: 0x404008 +# I386-NEXT: AddressOfIndex: 0x402000 +# I386-NEXT: AddressOfCallBacks: 0x0 +# I386-NEXT: SizeOfZeroFill: 0x0 +# I386-NEXT: Characteristics [ (0x300000) +# I386-NEXT: IMAGE_SCN_ALIGN_4BYTES (0x300000) +# I386-NEXT: ] +# I386-NEXT: } + + +## Test that the output of --coff-tls-directory errors on malformed input. +## On x86, the TLS directory should be 24 bytes. +## This test has a truncated TLS directory. + +# RUN: yaml2obj %s --docnum=1 -o %t.wrong-size.32.exe -DTLSRVA=10000 -DTLSSIZE=10 +# RUN: not llvm-readobj --coff-tls-directory %t.wrong-size.32.exe 2>&1 | FileCheck %s --check-prefix I386-WRONG-SIZE-ERR + +# I386-WRONG-SIZE-ERR: error: '{{.*}}': TLS Directory size (10) is not the expected size (24). + +--- !COFF +OptionalHeader: + AddressOfEntryPoint: 0 + ImageBase: 0 + SectionAlignment: 4096 + FileAlignment: 512 + MajorOperatingSystemVersion: 0 + MinorOperatingSystemVersion: 0 + MajorImageVersion: 0 + MinorImageVersion: 0 + MajorSubsystemVersion: 0 + MinorSubsystemVersion: 0 + Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI + DLLCharacteristics: [] + SizeOfStackReserve: 0 + SizeOfStackCommit: 0 + SizeOfHeapReserve: 0 + SizeOfHeapCommit: 0 + TlsTable: + RelativeVirtualAddress: [[TLSRVA]] + Size: [[TLSSIZE]] +header: + Machine: IMAGE_FILE_MACHINE_I386 + Characteristics: [ IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_32BIT_MACHINE ] +sections: + - Name: .rdata + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + VirtualAddress: 10000 + VirtualSize: 24 + SectionData: '004040000840400000204000000000000000000000003000' +symbols: [] + + +## Test that the output of --coff-tls-directory works on x86_64. +## The binary created from this yaml definition is such that .rdata contains +## only the IMAGE_TLS_DIRECTORY structure and hence we should have that +## TlsTable.RelativeVirtualAddress == .rdata section VirtualAddress. +## Also note that the .rdata section VirtualSize == sizeof(coff_tls_directory64) == sizeof(IMAGE_TLS_DIRECTORY64) == 40 + +# RUN: yaml2obj %s --docnum=2 -o %t.64.exe -DTLSRVA=10000 -DTLSSIZE=40 +# RUN: llvm-readobj --coff-tls-directory %t.64.exe | FileCheck %s --check-prefix X86-64 + +# X86-64: Arch: x86_64 +# X86-64-NEXT: AddressSize: 64bit +# X86-64-NEXT: TLSDirectory { +# X86-64-NEXT: StartAddressOfRawData: 0x140004000 +# X86-64-NEXT: EndAddressOfRawData: 0x140004008 +# X86-64-NEXT: AddressOfIndex: 0x140002000 +# X86-64-NEXT: AddressOfCallBacks: 0x0 +# X86-64-NEXT: SizeOfZeroFill: 0x0 +# X86-64-NEXT: Characteristics [ (0x300000) +# X86-64-NEXT: IMAGE_SCN_ALIGN_4BYTES (0x300000) +# X86-64-NEXT: ] +# X86-64-NEXT: } + + +## Test that the output of --coff-tls-directory errors on malformed input. + +## On x86-64, the TLS directory should be 40 bytes. +## This test has an erroneously lengthened TLS directory. + +# RUN: yaml2obj %s --docnum=2 -o %t.wrong-size.64.exe -DTLSRVA=10000 -DTLSSIZE=80 +# RUN: not llvm-readobj --coff-tls-directory %t.wrong-size.64.exe 2>&1 | FileCheck %s --check-prefix X86-64-WRONG-SIZE-ERR + +# X86-64-WRONG-SIZE-ERR: error: '{{.*}}': TLS Directory size (80) is not the expected size (40). + + +## This test has a correct TLS Directory size but the RVA is invalid. + +# RUN: yaml2obj %s --docnum=2 -o %t.bad-tls-rva.exe -DTLSRVA=999999 -DTLSSIZE=40 +# RUN: not llvm-readobj --coff-tls-directory %t.bad-tls-rva.exe 2>&1 | FileCheck %s --check-prefix BAD-TLS-RVA-ERR + +# BAD-TLS-RVA-ERR: error: '{{.*}}': Invalid data was encountered while parsing the file + +--- !COFF +OptionalHeader: + AddressOfEntryPoint: 0 + ImageBase: 0 + SectionAlignment: 4096 + FileAlignment: 512 + MajorOperatingSystemVersion: 0 + MinorOperatingSystemVersion: 0 + MajorImageVersion: 0 + MinorImageVersion: 0 + MajorSubsystemVersion: 0 + MinorSubsystemVersion: 0 + Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI + DLLCharacteristics: [] + SizeOfStackReserve: 0 + SizeOfStackCommit: 0 + SizeOfHeapReserve: 0 + SizeOfHeapCommit: 0 + TlsTable: + RelativeVirtualAddress: [[TLSRVA]] + Size: [[TLSSIZE]] +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_LARGE_ADDRESS_AWARE ] +sections: + - Name: .rdata + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + VirtualAddress: 10000 + VirtualSize: 40 + SectionData: '00400040010000000840004001000000002000400100000000000000000000000000000000003000' +symbols: [] + + +## Test that --coff-tls-directory doesn't output anything if there's no TLS directory. + +## Case 1: TlsTable.RelativeVirtualAddress/Size = 0. + +# RUN: yaml2obj %s --docnum=2 -o %t.no-tls1.exe -DTLSRVA=0 -DTLSSIZE=0 +# RUN: llvm-readobj --coff-tls-directory %t.no-tls1.exe | FileCheck %s --check-prefix NO-TLS + +## Case 2: There's no TlsTable listed in the COFF header. + +# RUN: yaml2obj %s --docnum=3 -o %t.no-tls2.exe +# RUN: llvm-readobj --coff-tls-directory %t.no-tls2.exe | FileCheck %s --check-prefix NO-TLS + +# NO-TLS: TLSDirectory { +# NO-TLS-NEXT: } + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_LARGE_ADDRESS_AWARE ] +sections: [] +symbols: [] diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp index 22e27b3e5a29e..f59bfd8b7cb6e 100644 --- a/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -89,6 +89,7 @@ class COFFDumper : public ObjDumper { void printCOFFDirectives() override; void printCOFFBaseReloc() override; void printCOFFDebugDirectory() override; + void printCOFFTLSDirectory() override; void printCOFFResources() override; void printCOFFLoadConfig() override; void printCodeViewDebugInfo() override; @@ -116,6 +117,8 @@ class COFFDumper : public ObjDumper { void printBaseOfDataField(const pe32plus_header *Hdr); template void printCOFFLoadConfig(const T *Conf, LoadConfigTables &Tables); + template + void printCOFFTLSDirectory(const coff_tls_directory *TlsTable); typedef void (*PrintExtraCB)(raw_ostream &, const uint8_t *); void printRVATable(uint64_t TableVA, uint64_t Count, uint64_t EntrySize, PrintExtraCB PrintExtra = 0); @@ -2018,3 +2021,27 @@ void llvm::dumpCodeViewMergedTypes(ScopedPrinter &Writer, Writer.flush(); } } + +void COFFDumper::printCOFFTLSDirectory() { + if (Obj->is64()) + printCOFFTLSDirectory(Obj->getTLSDirectory64()); + else + printCOFFTLSDirectory(Obj->getTLSDirectory32()); +} + +template +void COFFDumper::printCOFFTLSDirectory( + const coff_tls_directory *TlsTable) { + DictScope D(W, "TLSDirectory"); + if (!TlsTable) + return; + + W.printHex("StartAddressOfRawData", TlsTable->StartAddressOfRawData); + W.printHex("EndAddressOfRawData", TlsTable->EndAddressOfRawData); + W.printHex("AddressOfIndex", TlsTable->AddressOfIndex); + W.printHex("AddressOfCallBacks", TlsTable->AddressOfCallBacks); + W.printHex("SizeOfZeroFill", TlsTable->SizeOfZeroFill); + W.printFlags("Characteristics", TlsTable->Characteristics, + makeArrayRef(ImageSectionCharacteristics), + COFF::SectionCharacteristics(COFF::IMAGE_SCN_ALIGN_MASK)); +} diff --git a/llvm/tools/llvm-readobj/ObjDumper.h b/llvm/tools/llvm-readobj/ObjDumper.h index 9e45062ccda8d..943299a121fc5 100644 --- a/llvm/tools/llvm-readobj/ObjDumper.h +++ b/llvm/tools/llvm-readobj/ObjDumper.h @@ -80,6 +80,7 @@ class ObjDumper { virtual void printCOFFDirectives() { } virtual void printCOFFBaseReloc() { } virtual void printCOFFDebugDirectory() { } + virtual void printCOFFTLSDirectory() {} virtual void printCOFFResources() {} virtual void printCOFFLoadConfig() { } virtual void printCodeViewDebugInfo() { } diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp index 173ee3a7f140d..1546ce7926a40 100644 --- a/llvm/tools/llvm-readobj/llvm-readobj.cpp +++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp @@ -272,6 +272,10 @@ namespace opts { COFFDebugDirectory("coff-debug-directory", cl::desc("Display the PE/COFF debug directory")); + // --coff-tls-directory + cl::opt COFFTLSDirectory("coff-tls-directory", + cl::desc("Display the PE/COFF TLS directory")); + // --coff-resources cl::opt COFFResources("coff-resources", cl::desc("Display the PE/COFF .rsrc section")); @@ -533,6 +537,8 @@ static void dumpObject(const ObjectFile &Obj, ScopedPrinter &Writer, Dumper->printCOFFBaseReloc(); if (opts::COFFDebugDirectory) Dumper->printCOFFDebugDirectory(); + if (opts::COFFTLSDirectory) + Dumper->printCOFFTLSDirectory(); if (opts::COFFResources) Dumper->printCOFFResources(); if (opts::COFFLoadConfig) From 9824d5c83844495ac00c04dc1789c3529a68c1ac Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Thu, 8 Oct 2020 16:26:14 +0700 Subject: [PATCH 303/321] [Test] Add test showing that we fail to eliminate implied exit conditions --- .../IndVarSimplify/eliminate-comparison.ll | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll index 085d9ee20e1c0..00e655942c363 100644 --- a/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll @@ -942,4 +942,111 @@ leave: ret void } +declare i1 @cond_func() + +define i32 @func_25(i32 %start) { +; CHECK-LABEL: @func_25( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[C1:%.*]] = icmp ne i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[C1]], label [[CHECKED_1:%.*]], label [[FAIL:%.*]] +; CHECK: checked.1: +; CHECK-NEXT: [[C2:%.*]] = icmp ne i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[C2]], label [[CHECKED_2:%.*]], label [[FAIL]] +; CHECK: checked.2: +; CHECK-NEXT: [[C3:%.*]] = icmp ne i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[C3]], label [[BACKEDGE]], label [[FAIL]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 758394 +; CHECK-NEXT: [[LOOP_COND:%.*]] = call i1 @cond_func() +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: fail: +; CHECK-NEXT: unreachable +; CHECK: exit: +; CHECK-NEXT: [[IV_LCSSA1:%.*]] = phi i32 [ [[IV]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA1]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [%start, %entry], [%iv.next, %backedge] + %c1 = icmp ne i32 %iv, 0 + br i1 %c1, label %checked.1, label %fail + +checked.1: + %c2 = icmp ne i32 %iv, 0 + br i1 %c2, label %checked.2, label %fail + +checked.2: + %c3 = icmp ne i32 %iv, 0 + br i1 %c3, label %backedge, label %fail + +backedge: + %iv.next = add i32 %iv, 758394 + %loop.cond = call i1 @cond_func() + br i1 %loop.cond, label %loop, label %exit + +fail: + unreachable + +exit: + ret i32 %iv +} + +define i32 @func_26(i32 %start) { +; CHECK-LABEL: @func_26( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[C1]], label [[CHECKED_1:%.*]], label [[FAIL:%.*]] +; CHECK: checked.1: +; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C2]], label [[CHECKED_2:%.*]], label [[FAIL]] +; CHECK: checked.2: +; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[IV]], 2 +; CHECK-NEXT: br i1 [[C3]], label [[BACKEDGE]], label [[FAIL]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 758394 +; CHECK-NEXT: [[LOOP_COND:%.*]] = call i1 @cond_func() +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: fail: +; CHECK-NEXT: unreachable +; CHECK: exit: +; CHECK-NEXT: [[IV_LCSSA1:%.*]] = phi i32 [ [[IV]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA1]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [%start, %entry], [%iv.next, %backedge] + %c1 = icmp slt i32 %iv, 0 + br i1 %c1, label %checked.1, label %fail + +checked.1: + %c2 = icmp slt i32 %iv, 1 + br i1 %c2, label %checked.2, label %fail + +checked.2: + %c3 = icmp slt i32 %iv, 2 + br i1 %c3, label %backedge, label %fail + +backedge: + %iv.next = add i32 %iv, 758394 + %loop.cond = call i1 @cond_func() + br i1 %loop.cond, label %loop, label %exit + +fail: + unreachable + +exit: + ret i32 %iv +} + + !0 = !{i32 0, i32 2147483647} From 3578945004e322e45fdc47ca804f65628ea325a7 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Mon, 5 Oct 2020 12:29:18 +0300 Subject: [PATCH 304/321] [llvm-readobj][test] - Improve testing in hash-table.test This makes tests stricter and adds cases to verify what we do when: 1) there is no `DT_HASH` tag (but there is a `SHT_HASH` section in sections header) 2) the sh_entsize of the `SHT_HASH` section is not equal to 4. Differential revision: https://reviews.llvm.org/D88812 --- .../tools/llvm-readobj/ELF/hash-table.test | 48 ++++++++++++++----- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/hash-table.test b/llvm/test/tools/llvm-readobj/ELF/hash-table.test index 1102d848f03e4..b8d44e3cdf719 100644 --- a/llvm/test/tools/llvm-readobj/ELF/hash-table.test +++ b/llvm/test/tools/llvm-readobj/ELF/hash-table.test @@ -45,9 +45,13 @@ ProgramHeaders: ## Check we can dump the SHT_HASH section even when an object ## does not have the section header table. -# RUN: yaml2obj --docnum=2 %s -o %t.noshdr -# RUN: llvm-readobj --hash-table %t.noshdr | FileCheck %s --check-prefix=NOSHDR -# RUN: llvm-readelf --hash-table %t.noshdr | FileCheck %s --check-prefix=NOSHDR +# RUN: yaml2obj --docnum=2 -DNOHEADERS=true %s -o %t.noshdr +# RUN: llvm-readobj --hash-table %t.noshdr 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.noshdr --check-prefix=NOSHDR --implicit-check-not=warning: +# RUN: llvm-readelf --hash-table %t.noshdr 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.noshdr --check-prefix=NOSHDR --implicit-check-not=warning: + +# NOSHDR: warning: '[[FILE]]': string table was not found # NOSHDR: HashTable { # NOSHDR-NEXT: Num Buckets: 1 @@ -58,37 +62,57 @@ ProgramHeaders: --- !ELF FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN -## We simulate no section header table by -## overriding the ELF header properties. - EShOff: 0x0 - EShNum: 0x0 + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN Sections: - Name: .hash Type: SHT_HASH Flags: [ SHF_ALLOC ] Bucket: [ 0 ] Chain: [ 1 ] + EntSize: [[ENTSIZE=4]] - Name: .dynamic Type: SHT_DYNAMIC Flags: [ SHF_ALLOC ] Entries: - - Tag: DT_HASH + - Tag: [[DYNTAG=DT_HASH]] Value: 0x0 - Tag: DT_NULL Value: 0x0 +SectionHeaderTable: + NoHeaders: [[NOHEADERS=false]] ProgramHeaders: - Type: PT_LOAD Sections: - Section: .hash - Section: .dynamic - Type: PT_DYNAMIC - VAddr: 0x1010 + VAddr: 0x10 Sections: - Section: .dynamic +## Document we don't report a warning when the value of the sh_entsize field of the SHT_HASH section is not 4. + +# RUN: yaml2obj --docnum=2 -DENTSIZE=0xff %s -o %t.ent.size +# RUN: llvm-readobj --hash-table %t.ent.size 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.ent.size --check-prefix=NOSHDR --implicit-check-not=warning: +# RUN: llvm-readelf --hash-table %t.ent.size 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.ent.size --check-prefix=NOSHDR --implicit-check-not=warning: + +## Document we need the DT_HASH dynamic tag to locate the hash table. + +# RUN: yaml2obj --docnum=2 -DDYNTAG=DT_NULL %s -o %t.no.dyntag +# RUN: llvm-readobj --hash-table %t.no.dyntag 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.no.dyntag --check-prefix=NODYNTAG --implicit-check-not=warning: +# RUN: llvm-readelf --hash-table %t.no.dyntag 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.no.dyntag --check-prefix=NODYNTAG --implicit-check-not=warning: + +# NODYNTAG: warning: '[[FILE]]': string table was not found + +# NODYNTAG: HashTable { +# NODYNTAG-NEXT: } + ## Each SHT_HASH section starts with two 32-bit fields: nbucket and nchain. ## Check we report an error when a DT_HASH value points to data that has size less than 8 bytes. From cc83dc191c1ef04907ccd4308cc6aa5bf6fcd3c6 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Wed, 7 Oct 2020 16:17:35 +0200 Subject: [PATCH 305/321] Import llvm::StringSwitch into mlir namespace. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D88971 --- mlir/include/mlir/Support/LLVM.h | 4 ++++ mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h | 2 +- mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp | 2 +- mlir/lib/Dialect/PDL/IR/PDL.cpp | 2 +- mlir/lib/ExecutionEngine/JitRunner.cpp | 2 +- mlir/lib/IR/SymbolTable.cpp | 2 +- mlir/lib/Parser/Lexer.cpp | 2 +- mlir/lib/TableGen/Format.cpp | 2 +- mlir/lib/TableGen/Predicate.cpp | 2 +- mlir/test/lib/Dialect/Test/TestDialect.cpp | 2 +- mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp | 2 +- mlir/tools/mlir-tblgen/OpFormatGen.cpp | 4 ++-- 12 files changed, 16 insertions(+), 12 deletions(-) diff --git a/mlir/include/mlir/Support/LLVM.h b/mlir/include/mlir/Support/LLVM.h index 17e020442eb48..e8595ae29ed74 100644 --- a/mlir/include/mlir/Support/LLVM.h +++ b/mlir/include/mlir/Support/LLVM.h @@ -60,6 +60,8 @@ template class SmallVectorImpl; template class StringSet; +template +class StringSwitch; template class TinyPtrVector; template @@ -111,6 +113,8 @@ using llvm::SmallPtrSet; using llvm::SmallPtrSetImpl; using llvm::SmallVector; using llvm::SmallVectorImpl; +template +using StringSwitch = llvm::StringSwitch; using llvm::TinyPtrVector; template using TypeSwitch = llvm::TypeSwitch; diff --git a/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h b/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h index 93381054dd213..f4b7cedeb0e12 100644 --- a/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h +++ b/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h @@ -27,7 +27,7 @@ struct GPUIndexIntrinsicOpLowering : public ConvertToLLVMPattern { unsigned indexBitwidth; static dimension dimensionToIndex(Op op) { - return llvm::StringSwitch(op.dimension()) + return StringSwitch(op.dimension()) .Case("x", X) .Case("y", Y) .Case("z", Z) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp index aa611d76a67ab..574d0aa8c37f0 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp @@ -440,7 +440,7 @@ static LLVMType parseTypeImpl(DialectAsmParser &parser, if (failed(parser.parseKeyword(&key))) return LLVMType(); - return llvm::StringSwitch>(key) + return StringSwitch>(key) .Case("void", [&] { return LLVMVoidType::get(ctx); }) .Case("half", [&] { return LLVMHalfType::get(ctx); }) .Case("bfloat", [&] { return LLVMBFloatType::get(ctx); }) diff --git a/mlir/lib/Dialect/PDL/IR/PDL.cpp b/mlir/lib/Dialect/PDL/IR/PDL.cpp index a0b9c969becf6..ba1eb7b995749 100644 --- a/mlir/lib/Dialect/PDL/IR/PDL.cpp +++ b/mlir/lib/Dialect/PDL/IR/PDL.cpp @@ -34,7 +34,7 @@ Type PDLDialect::parseType(DialectAsmParser &parser) const { return Type(); Builder &builder = parser.getBuilder(); - Type result = llvm::StringSwitch(keyword) + Type result = StringSwitch(keyword) .Case("attribute", builder.getType()) .Case("operation", builder.getType()) .Case("type", builder.getType()) diff --git a/mlir/lib/ExecutionEngine/JitRunner.cpp b/mlir/lib/ExecutionEngine/JitRunner.cpp index 2b18adb373471..7d141e90edda3 100644 --- a/mlir/lib/ExecutionEngine/JitRunner.cpp +++ b/mlir/lib/ExecutionEngine/JitRunner.cpp @@ -291,7 +291,7 @@ int mlir::JitRunnerMain( Error (*)(Options &, ModuleOp, StringRef, std::function); auto compileAndExecuteFn = - llvm::StringSwitch(options.mainFuncType.getValue()) + StringSwitch(options.mainFuncType.getValue()) .Case("i32", compileAndExecuteSingleReturnFunction) .Case("i64", compileAndExecuteSingleReturnFunction) .Case("f32", compileAndExecuteSingleReturnFunction) diff --git a/mlir/lib/IR/SymbolTable.cpp b/mlir/lib/IR/SymbolTable.cpp index b064d83b5faad..e18e691f8cc80 100644 --- a/mlir/lib/IR/SymbolTable.cpp +++ b/mlir/lib/IR/SymbolTable.cpp @@ -166,7 +166,7 @@ SymbolTable::Visibility SymbolTable::getSymbolVisibility(Operation *symbol) { return Visibility::Public; // Otherwise, switch on the string value. - return llvm::StringSwitch(vis.getValue()) + return StringSwitch(vis.getValue()) .Case("private", Visibility::Private) .Case("nested", Visibility::Nested) .Case("public", Visibility::Public); diff --git a/mlir/lib/Parser/Lexer.cpp b/mlir/lib/Parser/Lexer.cpp index 9a3418eaf8327..ee31ff0cf9e4c 100644 --- a/mlir/lib/Parser/Lexer.cpp +++ b/mlir/lib/Parser/Lexer.cpp @@ -212,7 +212,7 @@ Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) { isAllDigit(spelling.drop_front(2)))) return Token(Token::inttype, spelling); - Token::Kind kind = llvm::StringSwitch(spelling) + Token::Kind kind = StringSwitch(spelling) #define TOK_KEYWORD(SPELLING) .Case(#SPELLING, Token::kw_##SPELLING) #include "TokenKinds.def" .Default(Token::bare_identifier); diff --git a/mlir/lib/TableGen/Format.cpp b/mlir/lib/TableGen/Format.cpp index 12735875c1c19..7d17a0aef3f97 100644 --- a/mlir/lib/TableGen/Format.cpp +++ b/mlir/lib/TableGen/Format.cpp @@ -60,7 +60,7 @@ Optional FmtContext::getSubstFor(StringRef placeholder) const { } FmtContext::PHKind FmtContext::getPlaceHolderKind(StringRef str) { - return llvm::StringSwitch(str) + return StringSwitch(str) .Case("_builder", FmtContext::PHKind::Builder) .Case("_op", FmtContext::PHKind::Op) .Case("_self", FmtContext::PHKind::Self) diff --git a/mlir/lib/TableGen/Predicate.cpp b/mlir/lib/TableGen/Predicate.cpp index 8927296af223b..a37847f0d4893 100644 --- a/mlir/lib/TableGen/Predicate.cpp +++ b/mlir/lib/TableGen/Predicate.cpp @@ -119,7 +119,7 @@ static PredCombinerKind getPredCombinerKind(const Pred &pred) { return PredCombinerKind::Leaf; const auto &combinedPred = static_cast(pred); - return llvm::StringSwitch( + return StringSwitch( combinedPred.getCombinerDef()->getName()) .Case("PredCombinerAnd", PredCombinerKind::And) .Case("PredCombinerOr", PredCombinerKind::Or) diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp index c84a7717abe78..4ca89bced5eb9 100644 --- a/mlir/test/lib/Dialect/Test/TestDialect.cpp +++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp @@ -685,7 +685,7 @@ void SideEffectOp::getEffects( // Get the specific memory effect. MemoryEffects::Effect *effect = - llvm::StringSwitch( + StringSwitch( effectElement.get("effect").cast().getValue()) .Case("allocate", MemoryEffects::Allocate::get()) .Case("free", MemoryEffects::Free::get()) diff --git a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp index 4fe3cd1ee174b..64424b4ac3d2f 100644 --- a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp +++ b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp @@ -288,7 +288,7 @@ Token Lexer::lexIdentifier(const char *tokStart) { // Check to see if this identifier is a keyword. StringRef str(tokStart, curPtr - tokStart); - Token::Kind kind = llvm::StringSwitch(str) + Token::Kind kind = StringSwitch(str) .Case("def", Token::Kind::kw_def) .Case("ods_def", Token::Kind::kw_ods_def) .Case("floordiv", Token::Kind::kw_floordiv) diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 336c9111677b4..9b8f249232401 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -719,7 +719,7 @@ static void genLiteralParser(StringRef value, OpMethodBody &body) { body << "Keyword(\"" << value << "\")"; return; } - body << (StringRef)llvm::StringSwitch(value) + body << (StringRef)StringSwitch(value) .Case("->", "Arrow()") .Case(":", "Colon()") .Case(",", "Comma()") @@ -1936,7 +1936,7 @@ Token FormatLexer::lexIdentifier(const char *tokStart) { // Check to see if this identifier is a keyword. StringRef str(tokStart, curPtr - tokStart); Token::Kind kind = - llvm::StringSwitch(str) + StringSwitch(str) .Case("attr-dict", Token::kw_attr_dict) .Case("attr-dict-with-keyword", Token::kw_attr_dict_w_keyword) .Case("custom", Token::kw_custom) From aa47962cc9493cd79ca78954e721ed02479729c7 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 8 Oct 2020 10:53:32 +0100 Subject: [PATCH 306/321] [InstCombine] canNarrowShiftAmt - replace custom Constant matching with m_SpecificInt_ICMP The existing code ignores undef values which matches m_SpecificInt_ICMP, although m_SpecificInt_ICMP returns false for an all-undef constant, I've added test coverage at rGfe0197e194a64f9 to show that undef folding should already have dealt with that case. --- .../InstCombine/InstCombineAndOrXor.cpp | 23 ++----------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index edb2dc8881c7b..c13362fa3136c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1701,27 +1701,8 @@ static Instruction *foldOrToXor(BinaryOperator &I, /// Return true if a constant shift amount is always less than the specified /// bit-width. If not, the shift could create poison in the narrower type. static bool canNarrowShiftAmt(Constant *C, unsigned BitWidth) { - if (auto *ScalarC = dyn_cast(C)) - return ScalarC->getZExtValue() < BitWidth; - - if (C->getType()->isVectorTy()) { - // Check each element of a constant vector. - unsigned NumElts = cast(C->getType())->getNumElements(); - for (unsigned i = 0; i != NumElts; ++i) { - Constant *Elt = C->getAggregateElement(i); - if (!Elt) - return false; - if (isa(Elt)) - continue; - auto *CI = dyn_cast(Elt); - if (!CI || CI->getZExtValue() >= BitWidth) - return false; - } - return true; - } - - // The constant is a constant expression or unknown. - return false; + APInt Threshold(C->getType()->getScalarSizeInBits(), BitWidth); + return match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold)); } /// Try to use narrower ops (sink zext ops) for an 'and' with binop operand and From c1fd4305b68500c754a7ce6a86fe297c36e21d3b Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Thu, 8 Oct 2020 11:07:36 +0200 Subject: [PATCH 307/321] [mlir] Add basic support for dynamic tensor results in TensorToBuffers.cpp. The simplest case is when the indexing maps are DimIds in every component. This covers cwise ops. Also: * Expose populateConvertLinalgOnTensorsToBuffersPatterns in Transforms.h * Expose emitLoopRanges in Transforms.h Differential Revision: https://reviews.llvm.org/D88781 --- .../Dialect/Linalg/Transforms/Transforms.h | 19 ++ mlir/lib/Dialect/Linalg/Transforms/Loops.cpp | 135 ++++++----- .../Linalg/Transforms/TensorsToBuffers.cpp | 213 ++++++++++++------ .../Dialect/Linalg/tensors-to-buffers.mlir | 75 ++++-- 4 files changed, 288 insertions(+), 154 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 2e566c941894f..395db396dadca 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -16,6 +16,9 @@ #include "llvm/ADT/SmallBitVector.h" namespace mlir { + +class BufferAssignmentTypeConverter; + namespace linalg { struct LinalgFusionOptions; @@ -45,6 +48,12 @@ void populateConvVectorizationPatterns( MLIRContext *context, SmallVectorImpl &patterns, ArrayRef tileSizes); +/// Populates the given list with patterns to convert Linalg operations on +/// tensors to buffers. +void populateConvertLinalgOnTensorsToBuffersPatterns( + MLIRContext *context, BufferAssignmentTypeConverter *converter, + OwningRewritePatternList *patterns); + /// Performs standalone tiling of a single LinalgOp by `tileSizes`. /// and permute the loop nest according to `interchangeVector` /// The permutation is expressed as a list of integers that specify @@ -246,6 +255,16 @@ Optional promoteSubViews(OpBuilder &b, LinalgOp op, LinalgPromotionOptions options, OperationFolder *folder = nullptr); +/// Creates a number of ranges equal to the number of dimensions in the `map`. +/// The returned ranges correspond to the loop ranges, in the proper order, for +/// which new loops will be created. +/// The function supports only maps that are invertible and have results of type +/// DimExpr or (DimExpr + DimExpr - SymbolExpr floordiv ConstExpr). +/// It expects a non-inverted, concatenated map and last values in +/// allViewSizes will be applied to the symbols in the map if it contains any. +SmallVector emitLoopRanges(OpBuilder &b, Location loc, AffineMap map, + ValueRange viewSizes); + /// Emit a suitable vector form for a Linalg op with fully static shape. void vectorizeLinalgOp(OpBuilder &builder, Operation *op); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp index 9e96c8cdc6919..b95469d8a9554 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -58,77 +58,6 @@ static SmallVector permuteIvs(ArrayRef ivs, : SmallVector(ivs.begin(), ivs.end()); } -/// Creates a number of ranges equal to the number of dimensions in the `map`. -/// The returned ranges correspond to the loop ranges, in the proper order, for -/// which new loops will be created. -/// The function supports only maps that are invertible and have results of type -/// DimExpr or (DimExpr + DimExpr - SymbolExpr floordiv ConstExpr). -/// It expects a non-inverted, concatenated map and last values in -/// allViewSizes will be applied to the symbols in the map if it contains any. -static SmallVector emitLoopRanges(OpBuilder &b, Location loc, - AffineMap map, - ValueRange viewSizes) { - unsigned numDims = map.getNumDims(), numRes = map.getNumResults(); - unsigned numSym = map.getNumSymbols(); - assert(viewSizes.size() == numRes + numSym && - "viewSizes must contain sizes of all views and values for symbols"); - SmallVector res(numDims); - for (unsigned idx = 0; idx < numRes; ++idx) { - auto result = map.getResult(idx); - if (auto d = result.dyn_cast()) { - if (res[d.getPosition()].offset) - continue; - res[d.getPosition()] = - Range{std_constant_index(0), viewSizes[idx], std_constant_index(1)}; - } - - // If the access pattern is of form (m, n)[s] -> (m + n - s floordiv 2), - // then the bounds are: - // (s floordiv 2) <= m <= (size(m) + s floordiv 2 - s + 1). - // where size(n) is applied to the symbol s. - // This is done statically now. - if (auto binOp = result.dyn_cast()) { - auto lhs = binOp.getLHS().dyn_cast(); - auto rhs = binOp.getRHS().dyn_cast(); - if (!lhs || !rhs || binOp.getKind() != AffineExprKind::Add || - lhs.getKind() != AffineExprKind::Add || - rhs.getKind() != mlir::AffineExprKind::Mul) - continue; - - auto m = lhs.getLHS().dyn_cast(); - auto n = lhs.getRHS().dyn_cast(); - auto fDiv = rhs.getLHS().dyn_cast(); - auto minusOne = rhs.getRHS().dyn_cast(); - if (!m || !n || !fDiv || !minusOne || - fDiv.getKind() != AffineExprKind::FloorDiv || - fDiv.getLHS().getKind() != AffineExprKind::SymbolId || - fDiv.getRHS().getKind() != AffineExprKind::Constant) - continue; - - auto s = fDiv.getLHS().dyn_cast(); - if (minusOne.getValue() != -1) - continue; - - int mPos = m.getPosition(); - AffineExpr one = getAffineConstantExpr(1, s.getContext()); - AffineExpr sizeOfM = getAffineSymbolExpr(numSym, s.getContext()); - // Construction of upper bound (size(m) + s floordiv 2 - s + 1). - AffineExpr upperOffsetExpr = sizeOfM + fDiv + one - s; - AffineMap fromMap = AffineMap::get(numDims, numSym + 1, fDiv); - AffineMap toMap = AffineMap::get(numDims, numSym + 1, upperOffsetExpr); - SmallVector values(viewSizes.begin(), - viewSizes.begin() + numDims); - values.insert(values.end(), viewSizes.begin() + numRes, viewSizes.end()); - values.push_back(viewSizes[mPos]); - // Construction of the lower bound (s floordiv 2). - Value from = applyMapToValues(b, loc, fromMap, values).front(); - Value to = applyMapToValues(b, loc, toMap, values).front(); - res[mPos] = Range{from, to, std_constant_index(1)}; - } - } - return res; -} - template static void inlineRegionAndEmitStore(OpType op, ArrayRef indexedValues, ArrayRef> indexing, @@ -708,6 +637,70 @@ static Optional linalgOpToLoopsImplSwitch(Operation *op, llvm_unreachable("Unexpected op in linalgOpToLoopsImpl"); } +SmallVector mlir::linalg::emitLoopRanges(OpBuilder &b, Location loc, + AffineMap map, + ValueRange viewSizes) { + unsigned numDims = map.getNumDims(), numRes = map.getNumResults(); + unsigned numSym = map.getNumSymbols(); + assert(viewSizes.size() == numRes + numSym && + "viewSizes must contain sizes of all views and values for symbols"); + SmallVector res(numDims); + for (unsigned idx = 0; idx < numRes; ++idx) { + auto result = map.getResult(idx); + if (auto d = result.dyn_cast()) { + if (res[d.getPosition()].offset) + continue; + res[d.getPosition()] = + Range{std_constant_index(0), viewSizes[idx], std_constant_index(1)}; + } + + // If the access pattern is of form (m, n)[s] -> (m + n - s floordiv 2), + // then the bounds are: + // (s floordiv 2) <= m <= (size(m) + s floordiv 2 - s + 1). + // where size(n) is applied to the symbol s. + // This is done statically now. + if (auto binOp = result.dyn_cast()) { + auto lhs = binOp.getLHS().dyn_cast(); + auto rhs = binOp.getRHS().dyn_cast(); + if (!lhs || !rhs || binOp.getKind() != AffineExprKind::Add || + lhs.getKind() != AffineExprKind::Add || + rhs.getKind() != mlir::AffineExprKind::Mul) + continue; + + auto m = lhs.getLHS().dyn_cast(); + auto n = lhs.getRHS().dyn_cast(); + auto fDiv = rhs.getLHS().dyn_cast(); + auto minusOne = rhs.getRHS().dyn_cast(); + if (!m || !n || !fDiv || !minusOne || + fDiv.getKind() != AffineExprKind::FloorDiv || + fDiv.getLHS().getKind() != AffineExprKind::SymbolId || + fDiv.getRHS().getKind() != AffineExprKind::Constant) + continue; + + auto s = fDiv.getLHS().dyn_cast(); + if (minusOne.getValue() != -1) + continue; + + int mPos = m.getPosition(); + AffineExpr one = getAffineConstantExpr(1, s.getContext()); + AffineExpr sizeOfM = getAffineSymbolExpr(numSym, s.getContext()); + // Construction of upper bound (size(m) + s floordiv 2 - s + 1). + AffineExpr upperOffsetExpr = sizeOfM + fDiv + one - s; + AffineMap fromMap = AffineMap::get(numDims, numSym + 1, fDiv); + AffineMap toMap = AffineMap::get(numDims, numSym + 1, upperOffsetExpr); + SmallVector values(viewSizes.begin(), + viewSizes.begin() + numDims); + values.insert(values.end(), viewSizes.begin() + numRes, viewSizes.end()); + values.push_back(viewSizes[mPos]); + // Construction of the lower bound (s floordiv 2). + Value from = applyMapToValues(b, loc, fromMap, values).front(); + Value to = applyMapToValues(b, loc, toMap, values).front(); + res[mPos] = Range{from, to, std_constant_index(1)}; + } + } + return res; +} + /// Emits a loop nest with the proper body for `op`. template Optional mlir::linalg::linalgLowerOpToLoops(OpBuilder &builder, diff --git a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp index b714a1f6c6428..3282358f5f414 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp @@ -14,14 +14,119 @@ #include "PassDetail.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/Linalg/Passes.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/IR/Function.h" #include "mlir/IR/Operation.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/BufferPlacement.h" -using namespace mlir; - namespace { + +using namespace ::mlir; +using namespace ::mlir::linalg; + +SmallVector +computeLoopRanges(Location loc, linalg::GenericOp linalgOp, OpBuilder *b) { + auto indexingMaps = llvm::to_vector<4>( + linalgOp.indexing_maps().getAsValueRange()); + auto inputIndexingMaps = + llvm::makeArrayRef(indexingMaps).take_front(linalgOp.getNumInputs()); + + mlir::edsc::ScopedContext scope(*b, loc); + return emitLoopRanges(scope.getBuilderRef(), loc, + concatAffineMaps(inputIndexingMaps), + getShape(*b, linalgOp)); +} + +Value maybeConvertToIndex(Location loc, Value val, OpBuilder *b) { + if (val.getType().isIndex()) + return val; + return b->create(loc, val, b->getIndexType()); +} + +LogicalResult allocateBuffersForResults(Location loc, + linalg::GenericOp linalgOp, + linalg::GenericOpAdaptor &adaptor, + SmallVectorImpl *resultBuffers, + OpBuilder *b) { + // Lazily compute loopRanges. + SmallVector loopRanges; + + // Allocate a buffer for every tensor result. + for (auto en : llvm::enumerate(linalgOp.getResultTypes())) { + size_t resultIndex = en.index(); + Type resultType = en.value(); + + auto tensorType = resultType.dyn_cast(); + if (tensorType == nullptr) { + linalgOp.emitOpError() + << "tensor to buffer conversion expects ranked tensor results"; + return failure(); + } + auto tensorShape = tensorType.getShape(); + auto memrefType = MemRefType::get(tensorShape, tensorType.getElementType()); + + // Allocate buffers for init tensors that are assumed to fold onto the first + // results. + // TODO: update this assumption because the reality is more complex + // under linalg on tensor based transformations. + bool foldedInitTensor = resultIndex < linalgOp.getNumInitTensors(); + if (foldedInitTensor) { + // Dealing with an init tensor requires distinguishing between 1-use + // and many-use cases which would create aliasing and WAR hazards. + Value initTensor = linalgOp.getInitTensor(resultIndex); + Value initBuffer = adaptor.init_tensors()[resultIndex]; + if (initTensor.hasOneUse()) { + resultBuffers->push_back(initBuffer); + continue; + } + SmallVector dynOperands; + for (auto dim : llvm::enumerate(tensorShape)) { + if (dim.value() == TensorType::kDynamicSize) { + dynOperands.push_back(b->create(loc, initTensor, dim.index())); + } + } + auto alloc = b->create(loc, memrefType, dynOperands); + b->create(loc, initBuffer, alloc); + resultBuffers->push_back(alloc); + continue; + } + + // Allocate buffers for statically-shaped results. + if (memrefType.hasStaticShape()) { + resultBuffers->push_back(b->create(loc, memrefType)); + continue; + } + + // Perform a naive shape inference for the dynamically-shaped results. + // Extract the required element out of the vector. + SmallVector dynOperands; + auto resultIndexingMap = linalgOp.getOutputIndexingMap(resultIndex); + for (auto shapeElement : llvm::enumerate(tensorType.getShape())) { + if (loopRanges.empty()) + loopRanges = computeLoopRanges(loc, linalgOp, b); + + if (shapeElement.value() != ShapedType::kDynamicSize) + continue; + + AffineExpr expr = resultIndexingMap.getResult(shapeElement.index()); + switch (expr.getKind()) { + case AffineExprKind::DimId: { + int64_t loopIndex = expr.cast().getPosition(); + Value size = maybeConvertToIndex(loc, loopRanges[loopIndex].size, b); + dynOperands.push_back(size); + break; + } + default: + return failure(); + } + } + resultBuffers->push_back(b->create(loc, memrefType, dynOperands)); + } + return success(); +} + /// A pattern to convert Generic Linalg operations which work on tensors to /// use buffers. A buffer is allocated using BufferAssignmentPlacer for /// each operation result. BufferPlacement pass should be later used to move @@ -34,10 +139,10 @@ class GenericOpConverter linalg::GenericOp>::BufferAssignmentOpConversionPattern; LogicalResult - matchAndRewrite(linalg::GenericOp op, ArrayRef operands, + matchAndRewrite(linalg::GenericOp linalgOp, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { - linalg::GenericOpAdaptor adaptor(operands, - op.getOperation()->getAttrDictionary()); + linalg::GenericOpAdaptor adaptor( + operands, linalgOp.getOperation()->getAttrDictionary()); // All inputs need to be turned into buffers first. Until then, bail out. if (llvm::any_of(adaptor.inputs(), @@ -50,93 +155,54 @@ class GenericOpConverter [](Value in) { return !in.getType().isa(); })) return failure(); - Location loc = op.getLoc(); - SmallVector newOutputBuffers; - newOutputBuffers.reserve(op.getNumOutputs()); - newOutputBuffers.append(adaptor.output_buffers().begin(), - adaptor.output_buffers().end()); - - // Update all types to memref types. - // Assume the init tensors fold onto the first results. - // TODO: update this assumption because the reality is more complex under - // linalg on tensor based transformations. - for (auto en : llvm::enumerate(op.getResultTypes())) { - auto type = en.value().cast(); - if (!type.hasStaticShape()) - return rewriter.notifyMatchFailure( - op, "dynamic shapes not currently supported"); - auto memrefType = MemRefType::get(type.getShape(), type.getElementType()); - bool foldedInitTensor = en.index() < op.getNumInitTensors(); - if (foldedInitTensor) { - // Dealing with an init tensor requires distinguishing between 1-use - // and many-use cases which would create aliasing and WAR hazards. - Value initTensor = op.getInitTensor(en.index()); - Value initBuffer = adaptor.init_tensors()[en.index()]; - if (initTensor.hasOneUse()) { - newOutputBuffers.push_back(initBuffer); - continue; - } - auto alloc = rewriter.create(loc, memrefType); - rewriter.create(loc, initBuffer, alloc); - newOutputBuffers.push_back(alloc); - } else { - auto alloc = rewriter.create(loc, memrefType); - newOutputBuffers.push_back(alloc); - } + Location loc = linalgOp.getLoc(); + SmallVector newOutputBuffers(adaptor.output_buffers().begin(), + adaptor.output_buffers().end()); + + if (failed(allocateBuffersForResults(loc, linalgOp, adaptor, + &newOutputBuffers, &rewriter))) { + linalgOp.emitOpError() + << "Failed to allocate buffers for tensor results."; + return failure(); } // Generate a new linalg operation that works on buffers. - auto linalgOp = rewriter.create( + auto newLinalgOp = rewriter.create( loc, - /*resultTensorTypes=*/ArrayRef{}, + /*resultTensorTypes=*/llvm::None, /*inputs=*/adaptor.inputs(), /*outputBuffers=*/newOutputBuffers, - /*initTensors=*/ValueRange{}, op.indexing_maps(), op.iterator_types(), - op.docAttr(), op.library_callAttr(), op.symbol_sourceAttr()); + /*initTensors=*/llvm::None, linalgOp.indexing_maps(), + linalgOp.iterator_types(), linalgOp.docAttr(), + linalgOp.library_callAttr(), linalgOp.symbol_sourceAttr()); // Create a new block in the region of the new Generic Op. - Block &oldBlock = op.getRegion().front(); - Region &newRegion = linalgOp.region(); + Block *oldBlock = linalgOp.getBody(); + Region &newRegion = newLinalgOp.region(); Block *newBlock = rewriter.createBlock(&newRegion, newRegion.begin(), - oldBlock.getArgumentTypes()); - - // Add the result arguments that do not come from init_tensors to the new - // block. - // TODO: update this assumption because the reality is more complex under - // linalg on tensor based transformations. - for (Value v : - ValueRange(newOutputBuffers).drop_front(adaptor.init_tensors().size())) + oldBlock->getArgumentTypes()); + + // Add the result arguments to the new block. + for (Value v : newOutputBuffers) newBlock->addArgument(v.getType().cast().getElementType()); // Clone the body of the old block to the new block. BlockAndValueMapping mapping; - for (unsigned i = 0; i < oldBlock.getNumArguments(); i++) - mapping.map(oldBlock.getArgument(i), newBlock->getArgument(i)); + mapping.map(oldBlock->getArguments(), newBlock->getArguments()); OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPointToEnd(newBlock); - for (auto &op : oldBlock.getOperations()) { + for (auto &op : oldBlock->getOperations()) { Operation *clonedOp = rewriter.clone(op, mapping); mapping.map(op.getResults(), clonedOp->getResults()); } // Replace the results of the old op with the new output buffers. - rewriter.replaceOp(op, newOutputBuffers); + rewriter.replaceOp(linalgOp, newOutputBuffers); return success(); } }; -/// Populate the given list with patterns to convert Linalg operations on -/// tensors to buffers. -static void populateConvertLinalgOnTensorsToBuffersPattern( - MLIRContext *context, BufferAssignmentTypeConverter *converter, - OwningRewritePatternList *patterns) { - populateWithBufferAssignmentOpConversionPatterns< - mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, converter, - patterns); - patterns->insert(context, converter); -} - /// Converts Linalg operations that work on tensor-type operands or results to /// work on buffers. struct ConvertLinalgOnTensorsToBuffers @@ -176,8 +242,11 @@ struct ConvertLinalgOnTensorsToBuffers BufferAssignmentTypeConverter::AppendToArgumentsList); OwningRewritePatternList patterns; - populateConvertLinalgOnTensorsToBuffersPattern(&context, &converter, - &patterns); + populateConvertLinalgOnTensorsToBuffersPatterns(&context, &converter, + &patterns); + populateWithBufferAssignmentOpConversionPatterns< + mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(&context, &converter, + &patterns); if (failed(applyFullConversion(this->getOperation(), target, patterns))) this->signalPassFailure(); } @@ -188,3 +257,9 @@ std::unique_ptr> mlir::createConvertLinalgOnTensorsToBuffersPass() { return std::make_unique(); } + +void mlir::linalg::populateConvertLinalgOnTensorsToBuffersPatterns( + MLIRContext *context, BufferAssignmentTypeConverter *converter, + OwningRewritePatternList *patterns) { + patterns->insert(context, converter); +} diff --git a/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir index 654a13fca743f..4339b33a23792 100644 --- a/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir +++ b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir @@ -2,11 +2,13 @@ #map0 = affine_map<(d0) -> (d0)> -// CHECK-LABEL: func @multiple_results_generic_op -func @multiple_results_generic_op(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) { - %0, %1 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"]} - ins(%arg0 : tensor<4xf32>) { - ^bb0(%gen_arg1: f32): +// CHECK-LABEL: func @multiple_results +func @multiple_results(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) { + %0, %1 = linalg.generic { + indexing_maps = [#map0, #map0, #map0], + iterator_types = ["parallel"] + } ins(%arg0 : tensor<4xf32>) { + ^bb0(%gen_arg1: f32): %tmp1 = exp %gen_arg1 : f32 linalg.yield %tmp1, %tmp1 : f32, f32 } -> tensor<4xf32>, tensor<4xf32> @@ -34,15 +36,20 @@ func @multiple_results_generic_op(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tenso // CHECK-LABEL: func @chained_operations func @chained_operations(%arg0: tensor<4xf32>) -> tensor<4xf32> { - %0 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0 : tensor<4xf32>) { - ^bb0(%gen_arg1: f32): + %0 = linalg.generic { + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"] + } ins(%arg0 : tensor<4xf32>) { + ^bb0(%gen_arg1: f32): %tmp1 = exp %gen_arg1 : f32 linalg.yield %tmp1 : f32 } -> tensor<4xf32> - %1 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%0 : tensor<4xf32>) { - ^bb0(%gen_arg2: f32): + + %1 = linalg.generic { + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"] + } ins(%0 : tensor<4xf32>) { + ^bb0(%gen_arg2: f32): %tmp2 = exp %gen_arg2 : f32 linalg.yield %tmp2 : f32 } -> tensor<4xf32> @@ -73,6 +80,46 @@ func @no_linalg_op(%arg0: f32) -> (f32, f32) { %0 = mulf %arg0, %arg0 : f32 return %0, %0 : f32, f32 } -// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]]) -> ([[TYPE]], [[TYPE]]) -// CHECK: %[[RESULT:.*]] = mulf %[[NEW_ARG0]], %[[NEW_ARG0]] : [[TYPE]] -// CHECK: return %[[RESULT]], %[[RESULT]] : [[TYPE]], [[TYPE]] +// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]]) -> ([[TYPE]], [[TYPE]]) +// CHECK: %[[RESULT:.*]] = mulf %[[NEW_ARG0]], %[[NEW_ARG0]] : [[TYPE]] +// CHECK: return %[[RESULT]], %[[RESULT]] : [[TYPE]], [[TYPE]] + +// ----- + +#map_2d = affine_map<(d0, d1) -> (d0, d1)> +#map_2d_inv = affine_map<(d0, d1) -> (d1, d0)> + +func @dynamic_results(%arg0: tensor) + -> (tensor, tensor) { + %0, %1 = linalg.generic { + indexing_maps = [#map_2d, #map_2d, #map_2d_inv], + iterator_types = ["parallel", "parallel"] + } ins(%arg0 : tensor) { + ^bb0(%gen_arg1: f32): + %tmp1 = exp %gen_arg1 : f32 + linalg.yield %tmp1, %tmp1 : f32, f32 + } -> tensor, tensor + return %0, %1 : tensor, tensor +} + +// CHECK: #map0 = affine_map<(d0, d1) -> (d0, d1)> +// CHECK: #map1 = affine_map<(d0, d1) -> (d1, d0)> + +// CHECK-LABEL: func @dynamic_results +// CHECK-SAME: (%[[INPUT:.*]]: [[TYPE:.*]], %[[OUT_1:.*]]: [[TYPE]], %[[OUT_2:.*]]: [[TYPE]]) { +// CHECK: %[[C0:.*]] = constant 0 : index +// CHECK: %[[DIM_0:.*]] = dim %[[INPUT]], %[[C0]] : [[TYPE]] +// CHECK: %[[C1:.*]] = constant 1 : index +// CHECK: %[[DIM_1:.*]] = dim %[[INPUT]], %[[C1]] : [[TYPE]] +// CHECK: %[[OUT_BUF_1:.*]] = alloc(%[[DIM_0]], %[[DIM_1]]) : [[TYPE]] +// CHECK: %[[OUT_BUF_2:.*]] = alloc(%[[DIM_1]], %[[DIM_0]]) : [[TYPE]] + +// CHECK: linalg.generic {indexing_maps = [#map0, #map0, #map1], {{.*}}} +// CHECK-SAME: ins(%[[INPUT]] : [[TYPE]]) +// CHECK-SAME: outs(%[[OUT_BUF_1]], %[[OUT_BUF_2]] : [[TYPE]], [[TYPE]]) { + +// CHECK: linalg.copy(%[[OUT_BUF_1]], %[[OUT_1]]) : [[TYPE]], [[TYPE]] +// CHECK: dealloc %[[OUT_BUF_1]] : [[TYPE]] +// CHECK: linalg.copy(%[[OUT_BUF_2]], %[[OUT_2]]) : [[TYPE]], [[TYPE]] +// CHECK: dealloc %[[OUT_BUF_2]] : [[TYPE]] +// CHECK: return From e1d4ca00094a4267b6fd70f613f2e0390b2a8cbf Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 8 Oct 2020 11:04:54 +0100 Subject: [PATCH 308/321] [InstCombine] matchRotate - add support for matching general funnel shifts with constant shift amounts (PR46896) First step towards extending the existing rotation support to full funnel shift handling now that the backend legalization support has improved. This enables us to match the shift by constant cases, which are pretty trivial to expand again if necessary. D88420 will add non-uniform support for funnel shifts as well once its been finalized. Differential Revision: https://reviews.llvm.org/D88834 --- .../InstCombine/InstCombineAndOrXor.cpp | 33 +++++++++------- llvm/test/Transforms/InstCombine/funnel.ll | 38 ++++++------------- 2 files changed, 31 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index c13362fa3136c..0cc50cbe36b85 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2052,22 +2052,22 @@ Instruction *InstCombinerImpl::matchBSwap(BinaryOperator &Or) { return LastInst; } -/// Transform UB-safe variants of bitwise rotate to the funnel shift intrinsic. -static Instruction *matchRotate(Instruction &Or) { +/// Match UB-safe variants of the funnel shift intrinsic. +static Instruction *matchFunnelShift(Instruction &Or) { // TODO: Can we reduce the code duplication between this and the related // rotate matching code under visitSelect and visitTrunc? unsigned Width = Or.getType()->getScalarSizeInBits(); - // First, find an or'd pair of opposite shifts with the same shifted operand: - // or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1) + // First, find an or'd pair of opposite shifts: + // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1) BinaryOperator *Or0, *Or1; if (!match(Or.getOperand(0), m_BinOp(Or0)) || !match(Or.getOperand(1), m_BinOp(Or1))) return nullptr; - Value *ShVal, *ShAmt0, *ShAmt1; - if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal), m_Value(ShAmt0)))) || - !match(Or1, m_OneUse(m_LogicalShift(m_Specific(ShVal), m_Value(ShAmt1))))) + Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1; + if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) || + !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1))))) return nullptr; BinaryOperator::BinaryOps ShiftOpcode0 = Or0->getOpcode(); @@ -2075,9 +2075,9 @@ static Instruction *matchRotate(Instruction &Or) { if (ShiftOpcode0 == ShiftOpcode1) return nullptr; - // Match the shift amount operands for a rotate pattern. This always matches - // a subtraction on the R operand. - auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * { + // Match the shift amount operands for a funnel shift pattern. This always + // matches a subtraction on the R operand. + auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * { // Check for constant shift amounts that sum to the bitwidth. // TODO: Support non-uniform shift amounts. const APInt *LC, *RC; @@ -2085,6 +2085,12 @@ static Instruction *matchRotate(Instruction &Or) { if (LC->ult(Width) && RC->ult(Width) && (*LC + *RC) == Width) return ConstantInt::get(L->getType(), *LC); + // For non-constant cases, the following patterns currently only work for + // rotation patterns. + // TODO: Add general funnel-shift compatible patterns. + if (ShVal0 != ShVal1) + return nullptr; + // For non-constant cases we don't support non-pow2 shift masks. // TODO: Is it worth matching urem as well? if (!isPowerOf2_32(Width)) @@ -2121,7 +2127,8 @@ static Instruction *matchRotate(Instruction &Or) { (SubIsOnLHS && ShiftOpcode1 == BinaryOperator::Shl); Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr; Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType()); - return IntrinsicInst::Create(F, { ShVal, ShVal, ShAmt }); + return IntrinsicInst::Create( + F, {IsFshl ? ShVal0 : ShVal1, IsFshl ? ShVal1 : ShVal0, ShAmt}); } /// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns. @@ -2574,8 +2581,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Instruction *BSwap = matchBSwap(I)) return BSwap; - if (Instruction *Rotate = matchRotate(I)) - return Rotate; + if (Instruction *Funnel = matchFunnelShift(I)) + return Funnel; if (Instruction *Concat = matchOrConcat(I, Builder)) return replaceInstUsesWith(I, Concat); diff --git a/llvm/test/Transforms/InstCombine/funnel.ll b/llvm/test/Transforms/InstCombine/funnel.ll index fca73a4ffb884..4dfcbcde4fe6b 100644 --- a/llvm/test/Transforms/InstCombine/funnel.ll +++ b/llvm/test/Transforms/InstCombine/funnel.ll @@ -3,16 +3,14 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -; TODO: Canonicalize or(shl,lshr) by constant to funnel shift intrinsics. +; Canonicalize or(shl,lshr) by constant to funnel shift intrinsics. ; This should help cost modeling for vectorization, inlining, etc. ; If a target does not have a fshl instruction, the expansion will ; be exactly these same 3 basic ops (shl/lshr/or). define i32 @fshl_i32_constant(i32 %x, i32 %y) { ; CHECK-LABEL: @fshl_i32_constant( -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 11 -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[Y:%.*]], 21 -; CHECK-NEXT: [[R:%.*]] = or i32 [[SHR]], [[SHL]] +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 11) ; CHECK-NEXT: ret i32 [[R]] ; %shl = shl i32 %x, 11 @@ -23,9 +21,7 @@ define i32 @fshl_i32_constant(i32 %x, i32 %y) { define i42 @fshr_i42_constant(i42 %x, i42 %y) { ; CHECK-LABEL: @fshr_i42_constant( -; CHECK-NEXT: [[SHR:%.*]] = lshr i42 [[X:%.*]], 31 -; CHECK-NEXT: [[SHL:%.*]] = shl i42 [[Y:%.*]], 11 -; CHECK-NEXT: [[R:%.*]] = or i42 [[SHR]], [[SHL]] +; CHECK-NEXT: [[R:%.*]] = call i42 @llvm.fshl.i42(i42 [[Y:%.*]], i42 [[X:%.*]], i42 11) ; CHECK-NEXT: ret i42 [[R]] ; %shr = lshr i42 %x, 31 @@ -34,13 +30,11 @@ define i42 @fshr_i42_constant(i42 %x, i42 %y) { ret i42 %r } -; TODO: Vector types are allowed. +; Vector types are allowed. define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) { ; CHECK-LABEL: @fshl_v2i16_constant_splat( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> ) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -51,9 +45,7 @@ define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) { define <2 x i16> @fshl_v2i16_constant_splat_undef0(<2 x i16> %x, <2 x i16> %y) { ; CHECK-LABEL: @fshl_v2i16_constant_splat_undef0( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> ) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -64,9 +56,7 @@ define <2 x i16> @fshl_v2i16_constant_splat_undef0(<2 x i16> %x, <2 x i16> %y) { define <2 x i16> @fshl_v2i16_constant_splat_undef1(<2 x i16> %x, <2 x i16> %y) { ; CHECK-LABEL: @fshl_v2i16_constant_splat_undef1( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> ) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -75,13 +65,11 @@ define <2 x i16> @fshl_v2i16_constant_splat_undef1(<2 x i16> %x, <2 x i16> %y) { ret <2 x i16> %r } -; TODO: Non-power-of-2 vector types are allowed. +; Non-power-of-2 vector types are allowed. define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) { ; CHECK-LABEL: @fshr_v2i17_constant_splat( -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> ) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shr = lshr <2 x i17> %x, @@ -92,9 +80,7 @@ define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) { define <2 x i17> @fshr_v2i17_constant_splat_undef0(<2 x i17> %x, <2 x i17> %y) { ; CHECK-LABEL: @fshr_v2i17_constant_splat_undef0( -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> ) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shr = lshr <2 x i17> %x, @@ -105,9 +91,7 @@ define <2 x i17> @fshr_v2i17_constant_splat_undef0(<2 x i17> %x, <2 x i17> %y) { define <2 x i17> @fshr_v2i17_constant_splat_undef1(<2 x i17> %x, <2 x i17> %y) { ; CHECK-LABEL: @fshr_v2i17_constant_splat_undef1( -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> ) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shr = lshr <2 x i17> %x, From 7d01bb8f56f5f1d8e180e1b8d47c00b45aa0555b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 8 Oct 2020 11:43:34 +0100 Subject: [PATCH 309/321] [llvm-objdump] Ensure we consistently use the llvm::stable_sort wrappers. We use this everywhere else in this file, these were just missed. --- llvm/tools/llvm-objdump/llvm-objdump.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 46ed7414dbb31..d57ea8ef94e78 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1737,8 +1737,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, // the output. StringSet<> FoundDisasmSymbolSet; for (std::pair &SecSyms : AllSymbols) - stable_sort(SecSyms.second); - stable_sort(AbsoluteSymbols); + llvm::stable_sort(SecSyms.second); + llvm::stable_sort(AbsoluteSymbols); std::unique_ptr DICtx; LiveVariablePrinter LVP(*Ctx.getRegisterInfo(), *STI); From 38348fa26548fdcdbc1ae18c6e612d67f103ee37 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 22 Sep 2020 11:42:37 +0100 Subject: [PATCH 310/321] AArch64: treat MC expressions as 2s complement arithmetic. We had a couple of over-zealous diagnostics that meant IR with a reasonable and valid interpretation was rejected. --- .../AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 16 ---------------- llvm/test/CodeGen/AArch64/2s-complement-asm.ll | 9 +++++++++ 2 files changed, 9 insertions(+), 16 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/2s-complement-asm.ll diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index dc44980ce218c..a355cbf30d315 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -154,19 +154,6 @@ static unsigned AdrImmBits(unsigned Value) { return (hi19 << 5) | (lo2 << 29); } -static bool valueFitsIntoFixupKind(unsigned Kind, uint64_t Value) { - unsigned NumBits; - switch(Kind) { - case FK_Data_1: NumBits = 8; break; - case FK_Data_2: NumBits = 16; break; - case FK_Data_4: NumBits = 32; break; - case FK_Data_8: NumBits = 64; break; - default: return true; - } - return isUIntN(NumBits, Value) || - isIntN(NumBits, static_cast(Value)); -} - static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target, uint64_t Value, MCContext &Ctx, const Triple &TheTriple, bool IsResolved) { @@ -341,9 +328,6 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target, case FK_Data_2: case FK_Data_4: case FK_Data_8: - if (!valueFitsIntoFixupKind(Fixup.getTargetKind(), Value)) - Ctx.reportError(Fixup.getLoc(), "fixup value too large for data type!"); - LLVM_FALLTHROUGH; case FK_SecRel_2: case FK_SecRel_4: return Value; diff --git a/llvm/test/CodeGen/AArch64/2s-complement-asm.ll b/llvm/test/CodeGen/AArch64/2s-complement-asm.ll new file mode 100644 index 0000000000000..cf646d1360204 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/2s-complement-asm.ll @@ -0,0 +1,9 @@ +; RUN: llc -mtriple=arm64-apple-ios %s -filetype=obj -o - | llvm-objdump --macho --section __DATA,__data - | FileCheck %s + +; CHECK: Contents of (__DATA,__data) section +; CHECK: 0000002a 59ed145d +@other = global i32 42 +@var = global i32 sub(i32 646102975, + i32 add (i32 trunc(i64 sub(i64 ptrtoint(i32* @var to i64), + i64 ptrtoint(i32* @other to i64)) to i32), + i32 3432360802)) From 1e75668821f7c2abfdb4a25af76239b9120ae0ca Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Thu, 8 Oct 2020 13:34:52 +0300 Subject: [PATCH 311/321] [AMDGPU][MC][GFX1030] Disabled v_mac_f32 See bug 47741 Reviewers: nhaehnle, rampitec Differential Revision: https://reviews.llvm.org/D89000 --- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 7 +++++-- llvm/test/MC/AMDGPU/gfx1030_err.s | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 3451c23891811..4c263de673d67 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -92,6 +92,7 @@ class VOP2_Real : // copy relevant pseudo op flags let SubtargetPredicate = ps.SubtargetPredicate; + let OtherPredicates = ps.OtherPredicates; let AsmMatchConverter = ps.AsmMatchConverter; let AsmVariantName = ps.AsmVariantName; let Constraints = ps.Constraints; @@ -494,14 +495,14 @@ defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN, or>; defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN, xor>; let mayRaiseFPException = 0 in { -let SubtargetPredicate = HasMadMacF32Insts in { +let OtherPredicates = [HasMadMacF32Insts] in { let Constraints = "$vdst = $src2", DisableEncoding="$src2", isConvertibleToThreeAddress = 1 in { defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; } def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; -} // End SubtargetPredicate = HasMadMacF32Insts +} // End OtherPredicates = [HasMadMacF32Insts] } // No patterns so that the scalar instructions are always selected. @@ -873,6 +874,7 @@ class Base_VOP2_DPP16 op, VOP2_DPP_Pseudo ps, VOP2_DPP { let AssemblerPredicate = HasDPP16; let SubtargetPredicate = HasDPP16; + let OtherPredicates = ps.OtherPredicates; } class VOP2_DPP16 op, VOP2_DPP_Pseudo ps, @@ -899,6 +901,7 @@ class VOP2_DPP8 op, VOP2_Pseudo ps, let AssemblerPredicate = HasDPP8; let SubtargetPredicate = HasDPP8; + let OtherPredicates = ps.OtherPredicates; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/gfx1030_err.s b/llvm/test/MC/AMDGPU/gfx1030_err.s index 246548f166839..c6af1736371ae 100644 --- a/llvm/test/MC/AMDGPU/gfx1030_err.s +++ b/llvm/test/MC/AMDGPU/gfx1030_err.s @@ -26,7 +26,7 @@ s_getreg_b32 s2, hwreg(HW_REG_XNACK_MASK) // GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: specified hardware register is not supported on this GPU v_mac_f32 v0, v1, v2 -// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mad_f32 v0, v1, v2, v3 // GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU From 06758c6a6135f59deec8e73d4fcb69946ab47f54 Mon Sep 17 00:00:00 2001 From: Markus Lavin Date: Thu, 8 Oct 2020 12:49:51 +0200 Subject: [PATCH 312/321] [DebugInfo] Improve dbg preservation in LSR. Use SCEV to salvage additional @llvm.dbg.value that have turned into referencing undef after transformation (and traditional salvageDebugInfo). Before transformation compute SCEV for each @llvm.dbg.value in the loop body and store it (along side its current DIExpression). After transformation update those @llvm.dbg.value now referencing undef by comparing its stored SCEV to the SCEV of the current loop-header PHI-nodes. Allow match with offset by inserting compensation code in the DIExpression. Includes fix for the nullptr deref that caused the original commit to be reverted in 9d63029770. Fixes : PR38815 Differential Revision: https://reviews.llvm.org/D87494 --- llvm/include/llvm/Analysis/ScalarEvolution.h | 18 ++--- .../Transforms/Scalar/LoopStrengthReduce.cpp | 57 ++++++++++++++ llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll | 2 +- .../LoopStrengthReduce/dbg-preserve-0.ll | 74 +++++++++++++++++++ 4 files changed, 141 insertions(+), 10 deletions(-) create mode 100644 llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 158257a5aa9a1..ac6090a30d2ff 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1117,6 +1117,15 @@ class ScalarEvolution { const SCEV *S, const Loop *L, SmallPtrSetImpl &Preds); + /// Compute \p LHS - \p RHS and returns the result as an APInt if it is a + /// constant, and None if it isn't. + /// + /// This is intended to be a cheaper version of getMinusSCEV. We can be + /// frugal here since we just bail out of actually constructing and + /// canonicalizing an expression in the cases where the result isn't going + /// to be a constant. + Optional computeConstantDifference(const SCEV *LHS, const SCEV *RHS); + private: /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a /// Value is deleted. @@ -1799,15 +1808,6 @@ class ScalarEvolution { bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R, SCEV::NoWrapFlags &Flags); - /// Compute \p LHS - \p RHS and returns the result as an APInt if it is a - /// constant, and None if it isn't. - /// - /// This is intended to be a cheaper version of getMinusSCEV. We can be - /// frugal here since we just bail out of actually constructing and - /// canonicalizing an expression in the cases where the result isn't going - /// to be a constant. - Optional computeConstantDifference(const SCEV *LHS, const SCEV *RHS); - /// Drop memoized information computed for S. void forgetMemoizedResults(const SCEV *S); diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 537838e2bdc19..93b9917b5972b 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -59,6 +59,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -80,6 +81,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalValue.h" @@ -5776,6 +5778,27 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, if (MSSA) MSSAU = std::make_unique(MSSA); + // Debug preservation - record all llvm.dbg.value from the loop as well as + // the SCEV of their variable location. Since salvageDebugInfo may change the + // DIExpression we need to store the original here as well (i.e. it needs to + // be in sync with the SCEV). + SmallVector< + std::tuple, + 32> + DbgValues; + for (auto &B : L->getBlocks()) { + for (auto &I : *B) { + if (DbgValueInst *D = dyn_cast(&I)) { + auto V = D->getVariableLocation(); + if (!V || !SE.isSCEVable(V->getType())) + continue; + auto DS = SE.getSCEV(V); + DbgValues.push_back( + std::make_tuple(D, V->getType(), DS, D->getExpression())); + } + } + } + // Run the main LSR transformation. Changed |= LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()).getChanged(); @@ -5797,6 +5820,40 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); } } + // Debug preservation - go through all recorded llvm.dbg.value and for those + // that now have an undef variable location use the recorded SCEV to try and + // update it. Compare with SCEV of Phi-nodes of loop header to find a + // suitable update candidate. SCEV match with constant offset is allowed and + // will be compensated for in the DIExpression. + if (Changed) { + for (auto &D : DbgValues) { + auto DbgValue = std::get(D); + auto DbgValueType = std::get(D); + auto DbgValueSCEV = std::get(D); + auto DbgDIExpr = std::get(D); + if (!isa(DbgValue->getVariableLocation())) + continue; + for (PHINode &Phi : L->getHeader()->phis()) { + if (DbgValueType != Phi.getType()) + continue; + if (!SE.isSCEVable(Phi.getType())) + continue; + auto PhiSCEV = SE.getSCEV(&Phi); + if (Optional Offset = + SE.computeConstantDifference(DbgValueSCEV, PhiSCEV)) { + auto &Ctx = DbgValue->getContext(); + DbgValue->setOperand( + 0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(&Phi))); + if (Offset.getValue().getSExtValue()) { + SmallVector Ops; + DIExpression::appendOffset(Ops, Offset.getValue().getSExtValue()); + DbgDIExpr = DIExpression::prependOpcodes(DbgDIExpr, Ops, true); + } + DbgValue->setOperand(2, MetadataAsValue::get(Ctx, DbgDIExpr)); + } + } + } + } return Changed; } diff --git a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll index 08aecdac5b794..e8f37a370666c 100644 --- a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll +++ b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll @@ -33,7 +33,7 @@ ; ASM: popl %ebx ; ASM: [[EPILOGUE]]: # %return ; ASM: retl $8 -; ASM: Ltmp10: +; ASM: Ltmp11: ; ASM: .cv_fpo_endproc ; Note how RvaStart advances 7 bytes to skip the shrink-wrapped portion. diff --git a/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll b/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll new file mode 100644 index 0000000000000..71031aabb95b7 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll @@ -0,0 +1,74 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s + +; Test that LSR preserves debug-info for induction variables. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define dso_local void @foo(i8* nocapture %p) local_unnamed_addr !dbg !7 { +; CHECK-LABEL: @foo( +entry: + call void @llvm.dbg.value(metadata i8* %p, metadata !13, metadata !DIExpression()), !dbg !16 + call void @llvm.dbg.value(metadata i8 0, metadata !14, metadata !DIExpression()), !dbg !17 + br label %for.body, !dbg !18 + +for.cond.cleanup: ; preds = %for.body + ret void, !dbg !19 + +for.body: ; preds = %entry, %for.body +; CHECK-LABEL: for.body: + %i.06 = phi i8 [ 0, %entry ], [ %inc, %for.body ] + %p.addr.05 = phi i8* [ %p, %entry ], [ %add.ptr, %for.body ] + call void @llvm.dbg.value(metadata i8 %i.06, metadata !14, metadata !DIExpression()), !dbg !17 + call void @llvm.dbg.value(metadata i8* %p.addr.05, metadata !13, metadata !DIExpression()), !dbg !16 +; CHECK-NOT: call void @llvm.dbg.value(metadata i8* undef +; CHECK: call void @llvm.dbg.value(metadata i8* %lsr.iv, metadata ![[MID_p:[0-9]+]], metadata !DIExpression(DW_OP_constu, 3, DW_OP_minus, DW_OP_stack_value)), !dbg !16 + %add.ptr = getelementptr inbounds i8, i8* %p.addr.05, i64 3, !dbg !20 + call void @llvm.dbg.value(metadata i8* %add.ptr, metadata !13, metadata !DIExpression()), !dbg !16 +; CHECK-NOT: call void @llvm.dbg.value(metadata i8* undef +; CHECK: call void @llvm.dbg.value(metadata i8* %lsr.iv, metadata ![[MID_p]], metadata !DIExpression()), !dbg !16 + store i8 %i.06, i8* %add.ptr, align 1, !dbg !23, !tbaa !24 + %inc = add nuw nsw i8 %i.06, 1, !dbg !27 + call void @llvm.dbg.value(metadata i8 %inc, metadata !14, metadata !DIExpression()), !dbg !17 + %exitcond.not = icmp eq i8 %inc, 32, !dbg !28 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !18, !llvm.loop !29 +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "lsrdbg.c", directory: "/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 12.0.0"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10} +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) +!11 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char) +!12 = !{!13, !14} +!13 = !DILocalVariable(name: "p", arg: 1, scope: !7, file: !1, line: 2, type: !10) +; CHECK: ![[MID_p]] = !DILocalVariable(name: "p", arg: 1, scope: !7, file: !1, line: 2, type: !10) +!14 = !DILocalVariable(name: "i", scope: !15, file: !1, line: 4, type: !11) +!15 = distinct !DILexicalBlock(scope: !7, file: !1, line: 4, column: 3) +!16 = !DILocation(line: 0, scope: !7) +!17 = !DILocation(line: 0, scope: !15) +!18 = !DILocation(line: 4, column: 3, scope: !15) +!19 = !DILocation(line: 8, column: 1, scope: !7) +!20 = !DILocation(line: 5, column: 7, scope: !21) +!21 = distinct !DILexicalBlock(scope: !22, file: !1, line: 4, column: 42) +!22 = distinct !DILexicalBlock(scope: !15, file: !1, line: 4, column: 3) +!23 = !DILocation(line: 6, column: 8, scope: !21) +!24 = !{!25, !25, i64 0} +!25 = !{!"omnipotent char", !26, i64 0} +!26 = !{!"Simple C/C++ TBAA"} +!27 = !DILocation(line: 4, column: 38, scope: !22) +!28 = !DILocation(line: 4, column: 31, scope: !22) +!29 = distinct !{!29, !18, !30, !31} +!30 = !DILocation(line: 7, column: 3, scope: !15) +!31 = !{!"llvm.loop.unroll.disable"} From 70bf35070a74df02ed12b9988ce58d50e63a7c04 Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Fri, 24 Apr 2020 00:27:14 +0700 Subject: [PATCH 313/321] [Driver] Add output file to properties of Command Object of class `Command` contains various properties of a command to execute, but output file was missed from them. This change adds this property. It is required for reporting consumed time and memory implemented in D78903 and may be used in other cases too. Differential Revision: https://reviews.llvm.org/D78902 --- clang/include/clang/Driver/Job.h | 22 ++++++++++--- clang/lib/Driver/Job.cpp | 18 +++++++---- clang/lib/Driver/ToolChains/AIX.cpp | 4 +-- clang/lib/Driver/ToolChains/AMDGPU.cpp | 6 ++-- clang/lib/Driver/ToolChains/AVR.cpp | 6 ++-- clang/lib/Driver/ToolChains/Ananas.cpp | 10 +++--- clang/lib/Driver/ToolChains/BareMetal.cpp | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 32 +++++++++++-------- clang/lib/Driver/ToolChains/CloudABI.cpp | 5 +-- clang/lib/Driver/ToolChains/CommonArgs.cpp | 7 ++-- clang/lib/Driver/ToolChains/CrossWindows.cpp | 7 ++-- clang/lib/Driver/ToolChains/Cuda.cpp | 6 ++-- clang/lib/Driver/ToolChains/Darwin.cpp | 12 +++---- clang/lib/Driver/ToolChains/DragonFly.cpp | 10 +++--- clang/lib/Driver/ToolChains/Flang.cpp | 5 +-- clang/lib/Driver/ToolChains/FreeBSD.cpp | 10 +++--- clang/lib/Driver/ToolChains/Fuchsia.cpp | 2 +- clang/lib/Driver/ToolChains/Gnu.cpp | 20 +++++++----- clang/lib/Driver/ToolChains/HIP.cpp | 14 ++++---- clang/lib/Driver/ToolChains/Hexagon.cpp | 10 +++--- .../lib/Driver/ToolChains/InterfaceStubs.cpp | 2 +- clang/lib/Driver/ToolChains/MSP430.cpp | 6 ++-- clang/lib/Driver/ToolChains/MSVC.cpp | 12 +++---- clang/lib/Driver/ToolChains/MinGW.cpp | 12 ++++--- clang/lib/Driver/ToolChains/Minix.cpp | 10 +++--- clang/lib/Driver/ToolChains/Myriad.cpp | 10 +++--- clang/lib/Driver/ToolChains/NaCl.cpp | 5 +-- clang/lib/Driver/ToolChains/NetBSD.cpp | 10 +++--- clang/lib/Driver/ToolChains/OpenBSD.cpp | 10 +++--- clang/lib/Driver/ToolChains/PS4CPU.cpp | 10 +++--- .../lib/Driver/ToolChains/RISCVToolchain.cpp | 6 ++-- clang/lib/Driver/ToolChains/Solaris.cpp | 4 +-- clang/lib/Driver/ToolChains/WebAssembly.cpp | 7 ++-- clang/lib/Driver/ToolChains/XCore.cpp | 4 +-- clang/unittests/Driver/ToolChainTest.cpp | 30 +++++++++++++++++ 35 files changed, 214 insertions(+), 132 deletions(-) diff --git a/clang/include/clang/Driver/Job.h b/clang/include/clang/Driver/Job.h index 6173b9d314b4d..8a348c8048d06 100644 --- a/clang/include/clang/Driver/Job.h +++ b/clang/include/clang/Driver/Job.h @@ -122,6 +122,9 @@ class Command { /// The list of program arguments which are inputs. llvm::opt::ArgStringList InputFilenames; + /// The list of program arguments which are outputs. May be empty. + std::vector OutputFilenames; + /// Response file name, if this command is set to use one, or nullptr /// otherwise const char *ResponseFile = nullptr; @@ -158,8 +161,8 @@ class Command { Command(const Action &Source, const Tool &Creator, ResponseFileSupport ResponseSupport, const char *Executable, - const llvm::opt::ArgStringList &Arguments, - ArrayRef Inputs); + const llvm::opt::ArgStringList &Arguments, ArrayRef Inputs, + ArrayRef Outputs = None); // FIXME: This really shouldn't be copyable, but is currently copied in some // error handling in Driver::generateCompilationDiagnostics. Command(const Command &) = default; @@ -201,6 +204,14 @@ class Command { const llvm::opt::ArgStringList &getArguments() const { return Arguments; } + const llvm::opt::ArgStringList &getInputFilenames() const { + return InputFilenames; + } + + const std::vector &getOutputFilenames() const { + return OutputFilenames; + } + protected: /// Optionally print the filenames to be compiled void PrintFileNames() const; @@ -212,7 +223,7 @@ class CC1Command : public Command { CC1Command(const Action &Source, const Tool &Creator, ResponseFileSupport ResponseSupport, const char *Executable, const llvm::opt::ArgStringList &Arguments, - ArrayRef Inputs); + ArrayRef Inputs, ArrayRef Outputs = None); void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote, CrashReportInfo *CrashInfo = nullptr) const override; @@ -230,7 +241,7 @@ class FallbackCommand : public Command { FallbackCommand(const Action &Source_, const Tool &Creator_, ResponseFileSupport ResponseSupport, const char *Executable_, const llvm::opt::ArgStringList &Arguments_, - ArrayRef Inputs, + ArrayRef Inputs, ArrayRef Outputs, std::unique_ptr Fallback_); void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote, @@ -250,7 +261,8 @@ class ForceSuccessCommand : public Command { ResponseFileSupport ResponseSupport, const char *Executable_, const llvm::opt::ArgStringList &Arguments_, - ArrayRef Inputs); + ArrayRef Inputs, + ArrayRef Outputs = None); void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote, CrashReportInfo *CrashInfo = nullptr) const override; diff --git a/clang/lib/Driver/Job.cpp b/clang/lib/Driver/Job.cpp index 4808a9f4628d5..de2c2350f8d1d 100644 --- a/clang/lib/Driver/Job.cpp +++ b/clang/lib/Driver/Job.cpp @@ -38,12 +38,15 @@ using namespace driver; Command::Command(const Action &Source, const Tool &Creator, ResponseFileSupport ResponseSupport, const char *Executable, const llvm::opt::ArgStringList &Arguments, - ArrayRef Inputs) + ArrayRef Inputs, ArrayRef Outputs) : Source(Source), Creator(Creator), ResponseSupport(ResponseSupport), Executable(Executable), Arguments(Arguments) { for (const auto &II : Inputs) if (II.isFilename()) InputFilenames.push_back(II.getFilename()); + for (const auto &II : Outputs) + if (II.isFilename()) + OutputFilenames.push_back(II.getFilename()); } /// Check if the compiler flag in question should be skipped when @@ -357,8 +360,9 @@ CC1Command::CC1Command(const Action &Source, const Tool &Creator, ResponseFileSupport ResponseSupport, const char *Executable, const llvm::opt::ArgStringList &Arguments, - ArrayRef Inputs) - : Command(Source, Creator, ResponseSupport, Executable, Arguments, Inputs) { + ArrayRef Inputs, ArrayRef Outputs) + : Command(Source, Creator, ResponseSupport, Executable, Arguments, Inputs, + Outputs) { InProcess = true; } @@ -415,9 +419,10 @@ FallbackCommand::FallbackCommand(const Action &Source_, const Tool &Creator_, const char *Executable_, const llvm::opt::ArgStringList &Arguments_, ArrayRef Inputs, + ArrayRef Outputs, std::unique_ptr Fallback_) : Command(Source_, Creator_, ResponseSupport, Executable_, Arguments_, - Inputs), + Inputs, Outputs), Fallback(std::move(Fallback_)) {} void FallbackCommand::Print(raw_ostream &OS, const char *Terminator, @@ -456,9 +461,10 @@ int FallbackCommand::Execute(ArrayRef> Redirects, ForceSuccessCommand::ForceSuccessCommand( const Action &Source_, const Tool &Creator_, ResponseFileSupport ResponseSupport, const char *Executable_, - const llvm::opt::ArgStringList &Arguments_, ArrayRef Inputs) + const llvm::opt::ArgStringList &Arguments_, ArrayRef Inputs, + ArrayRef Outputs) : Command(Source_, Creator_, ResponseSupport, Executable_, Arguments_, - Inputs) {} + Inputs, Outputs) {} void ForceSuccessCommand::Print(raw_ostream &OS, const char *Terminator, bool Quote, CrashReportInfo *CrashInfo) const { diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index 351b34e8bf90f..b833ebaebf925 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -71,7 +71,7 @@ void aix::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } void aix::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -170,7 +170,7 @@ void aix::Linker::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } /// AIX - AIX tool chain which can call as(1) and ld(1) directly. diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index c6087156642b2..6781045886f20 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -356,9 +356,9 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-shared"); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); - C.addCommand( - std::make_unique(JA, *this, ResponseFileSupport::AtFileCurCP(), - Args.MakeArgString(Linker), CmdArgs, Inputs)); + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker), + CmdArgs, Inputs, Output)); } void amdgpu::getAMDGPUTargetFeatures(const Driver &D, diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp index 092bade53c635..02b745c6a2056 100644 --- a/clang/lib/Driver/ToolChains/AVR.cpp +++ b/clang/lib/Driver/ToolChains/AVR.cpp @@ -142,9 +142,9 @@ void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString(std::string("-m") + *FamilyName)); } - C.addCommand( - std::make_unique(JA, *this, ResponseFileSupport::AtFileCurCP(), - Args.MakeArgString(Linker), CmdArgs, Inputs)); + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker), + CmdArgs, Inputs, Output)); } llvm::Optional AVRToolChain::findAVRLibcInstallation() const { diff --git a/clang/lib/Driver/ToolChains/Ananas.cpp b/clang/lib/Driver/ToolChains/Ananas.cpp index a4141a57acccb..e5e33fe24874e 100644 --- a/clang/lib/Driver/ToolChains/Ananas.cpp +++ b/clang/lib/Driver/ToolChains/Ananas.cpp @@ -39,8 +39,9 @@ void ananas::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void ananas::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -124,8 +125,9 @@ void ananas::Linker::ConstructJob(Compilation &C, const JobAction &JA, } const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } // Ananas - Ananas tool chain which can call as(1) and ld(1) directly. diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index 61839a9e31b0b..6ed81c1e34a12 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -202,5 +202,5 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), Args.MakeArgString(TC.GetLinkerPath()), - CmdArgs, Inputs)); + CmdArgs, Inputs, Output)); } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 630b39d1e769e..b37dcfee1a3ec 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4356,9 +4356,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, II.getInputArg().renderAsInput(Args, CmdArgs); } - C.addCommand( - std::make_unique(JA, *this, ResponseFileSupport::AtFileUTF8(), - D.getClangProgramPath(), CmdArgs, Inputs)); + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::AtFileUTF8(), D.getClangProgramPath(), + CmdArgs, Inputs, Output)); return; } @@ -6314,20 +6314,23 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, getCLFallback()->GetCommand(C, JA, Output, Inputs, Args, LinkingOutput); C.addCommand(std::make_unique( JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs, - std::move(CLCommand))); + Output, std::move(CLCommand))); } else if (Args.hasArg(options::OPT__SLASH_fallback) && isa(JA)) { // In /fallback builds, run the main compilation even if the pch generation // fails, so that the main compilation's fallback to cl.exe runs. C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs, + Output)); } else if (D.CC1Main && !D.CCGenDiagnostics) { // Invoke the CC1 directly in this process - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileUTF8(), + Exec, CmdArgs, Inputs, Output)); } else { - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileUTF8(), + Exec, CmdArgs, Inputs, Output)); } // Make the compile command echo its inputs for /showFilenames. @@ -7074,8 +7077,9 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Input.getFilename()); const char *Exec = getToolChain().getDriver().getClangProgramPath(); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileUTF8(), + Exec, CmdArgs, Inputs, Output)); } // Begin OffloadBundler @@ -7161,7 +7165,7 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, C.addCommand(std::make_unique( JA, *this, ResponseFileSupport::None(), TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), - CmdArgs, None)); + CmdArgs, None, Output)); } void OffloadBundler::ConstructJobMultipleOutputs( @@ -7227,7 +7231,7 @@ void OffloadBundler::ConstructJobMultipleOutputs( C.addCommand(std::make_unique( JA, *this, ResponseFileSupport::None(), TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), - CmdArgs, None)); + CmdArgs, None, Outputs)); } void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, @@ -7257,5 +7261,5 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, C.addCommand(std::make_unique( JA, *this, ResponseFileSupport::None(), Args.MakeArgString(getToolChain().GetProgramPath(getShortName())), - CmdArgs, Inputs)); + CmdArgs, Inputs, Output)); } diff --git a/clang/lib/Driver/ToolChains/CloudABI.cpp b/clang/lib/Driver/ToolChains/CloudABI.cpp index 8dcfd4951bbfe..3efca8776260a 100644 --- a/clang/lib/Driver/ToolChains/CloudABI.cpp +++ b/clang/lib/Driver/ToolChains/CloudABI.cpp @@ -92,8 +92,9 @@ void cloudabi::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o"))); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } // CloudABI - CloudABI tool chain which can call ld(1) directly. diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index e3723e213c52f..692d0600bad35 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -951,12 +951,13 @@ void tools::SplitDebugInfo(const ToolChain &TC, Compilation &C, const Tool &T, InputInfo II(types::TY_Object, Output.getFilename(), Output.getFilename()); // First extract the dwo sections. - C.addCommand(std::make_unique( - JA, T, ResponseFileSupport::AtFileCurCP(), Exec, ExtractArgs, II)); + C.addCommand(std::make_unique(JA, T, + ResponseFileSupport::AtFileCurCP(), + Exec, ExtractArgs, II, Output)); // Then remove them from the original .o file. C.addCommand(std::make_unique( - JA, T, ResponseFileSupport::AtFileCurCP(), Exec, StripArgs, II)); + JA, T, ResponseFileSupport::AtFileCurCP(), Exec, StripArgs, II, Output)); } // Claim options we don't want to warn if they are unused. We do this for diff --git a/clang/lib/Driver/ToolChains/CrossWindows.cpp b/clang/lib/Driver/ToolChains/CrossWindows.cpp index 127a8a5f24cce..28ad6c59c655c 100644 --- a/clang/lib/Driver/ToolChains/CrossWindows.cpp +++ b/clang/lib/Driver/ToolChains/CrossWindows.cpp @@ -58,7 +58,7 @@ void tools::CrossWindows::Assembler::ConstructJob( Exec = Args.MakeArgString(Assembler); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } void tools::CrossWindows::Linker::ConstructJob( @@ -203,8 +203,9 @@ void tools::CrossWindows::Linker::ConstructJob( Exec = Args.MakeArgString(TC.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileUTF8(), + Exec, CmdArgs, Inputs, Output)); } CrossWindowsToolChain::CrossWindowsToolChain(const Driver &D, diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index d7933534a5d3d..217a0155a52d3 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -427,7 +427,7 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, JA, *this, ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8, "--options-file"}, - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) { @@ -496,7 +496,7 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, JA, *this, ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8, "--options-file"}, - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, @@ -577,7 +577,7 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, JA, *this, ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8, "--options-file"}, - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary, diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 8f2be2a343cc5..0d9e471ec0709 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -149,7 +149,7 @@ void darwin::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } void darwin::MachOTool::anchor() {} @@ -522,7 +522,7 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString(getToolChain().GetProgramPath("touch")); CmdArgs.push_back(Output.getFilename()); C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::None(), Exec, CmdArgs, None)); + JA, *this, ResponseFileSupport::None(), Exec, CmdArgs, None, Output)); return; } @@ -695,7 +695,7 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); std::unique_ptr Cmd = std::make_unique( - JA, *this, ResponseSupport, Exec, CmdArgs, Inputs); + JA, *this, ResponseSupport, Exec, CmdArgs, Inputs, Output); Cmd->setInputFileList(std::move(InputFileList)); C.addCommand(std::move(Cmd)); } @@ -720,7 +720,7 @@ void darwin::Lipo::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("lipo")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } void darwin::Dsymutil::ConstructJob(Compilation &C, const JobAction &JA, @@ -741,7 +741,7 @@ void darwin::Dsymutil::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("dsymutil")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } void darwin::VerifyDebug::ConstructJob(Compilation &C, const JobAction &JA, @@ -765,7 +765,7 @@ void darwin::VerifyDebug::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("dwarfdump")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } MachO::MachO(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) diff --git a/clang/lib/Driver/ToolChains/DragonFly.cpp b/clang/lib/Driver/ToolChains/DragonFly.cpp index 88dd0c899d8a8..08176e507eed0 100644 --- a/clang/lib/Driver/ToolChains/DragonFly.cpp +++ b/clang/lib/Driver/ToolChains/DragonFly.cpp @@ -45,8 +45,9 @@ void dragonfly::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -170,8 +171,9 @@ void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA, getToolChain().addProfileRTLibs(Args, CmdArgs); const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } /// DragonFly - DragonFly tool chain which can call as(1) and ld(1) directly. diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 93401c6626630..f8633b988faa5 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -72,8 +72,9 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, // TODO: Replace flang-new with flang once the new driver replaces the // throwaway driver const char *Exec = Args.MakeArgString(D.GetProgramPath("flang-new", TC)); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileUTF8(), + Exec, CmdArgs, Inputs, Output)); } Flang::Flang(const ToolChain &TC) : Tool("flang-new", "flang frontend", TC) {} diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp index 909ac5e992129..5854defca4881 100644 --- a/clang/lib/Driver/ToolChains/FreeBSD.cpp +++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp @@ -128,8 +128,9 @@ void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -359,8 +360,9 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, ToolChain.addProfileRTLibs(Args, CmdArgs); const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } /// FreeBSD - FreeBSD tool chain which can call as(1) and ld(1) directly. diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index 781179be39a36..79d3a8d554ded 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -167,7 +167,7 @@ void fuchsia::Linker::ConstructJob(Compilation &C, const JobAction &JA, } C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } /// Fuchsia - Fuchsia tool chain which can call as(1) and ld(1) directly. diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index f3843685a522b..7d75e90c6092f 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -171,8 +171,9 @@ void tools::gcc::Common::ConstructJob(Compilation &C, const JobAction &JA, GCCName = "gcc"; const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath(GCCName)); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void tools::gcc::Preprocessor::RenderExtraToolArgs( @@ -364,8 +365,9 @@ void tools::gnutools::StaticLibTool::ConstructJob( } const char *Exec = Args.MakeArgString(getToolChain().GetStaticLibToolPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -662,8 +664,9 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_T); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void tools::gnutools::Assembler::ConstructJob(Compilation &C, @@ -930,8 +933,9 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath(DefaultAssembler)); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); // Handle the debug info splitting at object creation time if we're // creating an object. diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index 4d1e0f9f2fdfc..a06835eee0243 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -98,7 +98,7 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, LldArgs.push_back(Input.getFilename()); const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Lld, LldArgs, Inputs)); + Lld, LldArgs, Inputs, Output)); } // Construct a clang-offload-bundler command to bundle code objects for @@ -127,14 +127,16 @@ void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg)); BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg)); - auto BundlerOutputArg = Args.MakeArgString( - std::string("-outputs=").append(std::string(OutputFileName))); + std::string Output = std::string(OutputFileName); + auto BundlerOutputArg = + Args.MakeArgString(std::string("-outputs=").append(Output)); BundlerArgs.push_back(BundlerOutputArg); const char *Bundler = Args.MakeArgString( T.getToolChain().GetProgramPath("clang-offload-bundler")); - C.addCommand(std::make_unique(JA, T, ResponseFileSupport::None(), - Bundler, BundlerArgs, Inputs)); + C.addCommand(std::make_unique( + JA, T, ResponseFileSupport::None(), Bundler, BundlerArgs, Inputs, + InputInfo(&JA, Args.MakeArgString(Output)))); } /// Add Generated HIP Object File which has device images embedded into the @@ -205,7 +207,7 @@ void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary( McinFile, "--filetype=obj"}; const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Mc, McArgs, Inputs)); + Mc, McArgs, Inputs, Output)); } // For amdgcn the inputs of the linker job are device bitcode and output is diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index 775f6e1094fa6..fb54f73bcd4c8 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -189,8 +189,9 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA, } auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName)); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void hexagon::Linker::RenderExtraToolArgs(const JobAction &JA, @@ -407,8 +408,9 @@ void hexagon::Linker::ConstructJob(Compilation &C, const JobAction &JA, LinkingOutput); const char *Exec = Args.MakeArgString(HTC.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } // Hexagon tools end. diff --git a/clang/lib/Driver/ToolChains/InterfaceStubs.cpp b/clang/lib/Driver/ToolChains/InterfaceStubs.cpp index f7c11421e8094..57acf338df5c4 100644 --- a/clang/lib/Driver/ToolChains/InterfaceStubs.cpp +++ b/clang/lib/Driver/ToolChains/InterfaceStubs.cpp @@ -56,7 +56,7 @@ void Merger::ConstructJob(Compilation &C, const JobAction &JA, C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), Args.MakeArgString(Merger), CmdArgs, - Inputs)); + Inputs, Output)); } } // namespace ifstool } // namespace tools diff --git a/clang/lib/Driver/ToolChains/MSP430.cpp b/clang/lib/Driver/ToolChains/MSP430.cpp index 6d663e4909e59..f3ed9967a81a1 100644 --- a/clang/lib/Driver/ToolChains/MSP430.cpp +++ b/clang/lib/Driver/ToolChains/MSP430.cpp @@ -312,7 +312,7 @@ void msp430::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_T); - C.addCommand( - std::make_unique(JA, *this, ResponseFileSupport::AtFileCurCP(), - Args.MakeArgString(Linker), CmdArgs, Inputs)); + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker), + CmdArgs, Inputs, Output)); } diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index 7faccdff6beed..ba2c7146b924e 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -606,9 +606,9 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, linkPath = TC.GetProgramPath(Linker.str().c_str()); } - auto LinkCmd = - std::make_unique(JA, *this, ResponseFileSupport::AtFileUTF16(), - Args.MakeArgString(linkPath), CmdArgs, Inputs); + auto LinkCmd = std::make_unique( + JA, *this, ResponseFileSupport::AtFileUTF16(), + Args.MakeArgString(linkPath), CmdArgs, Inputs, Output); if (!Environment.empty()) LinkCmd->setEnvironment(Environment); C.addCommand(std::move(LinkCmd)); @@ -748,9 +748,9 @@ std::unique_ptr visualstudio::Compiler::GetCommand( CmdArgs.push_back(Fo); std::string Exec = FindVisualStudioExecutable(getToolChain(), "cl.exe"); - return std::make_unique(JA, *this, - ResponseFileSupport::AtFileUTF16(), - Args.MakeArgString(Exec), CmdArgs, Inputs); + return std::make_unique( + JA, *this, ResponseFileSupport::AtFileUTF16(), Args.MakeArgString(Exec), + CmdArgs, Inputs, Output); } MSVCToolChain::MSVCToolChain(const Driver &D, const llvm::Triple &Triple, diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp index 4267af60bf031..49fef4298bfec 100644 --- a/clang/lib/Driver/ToolChains/MinGW.cpp +++ b/clang/lib/Driver/ToolChains/MinGW.cpp @@ -51,7 +51,7 @@ void tools::MinGW::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); if (Args.hasArg(options::OPT_gsplit_dwarf)) SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, @@ -167,9 +167,10 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, // that lacks an extension. // GCC used to do this only when the compiler itself runs on windows, but // since GCC 8 it does the same when cross compiling as well. - if (!llvm::sys::path::has_extension(OutputFile)) + if (!llvm::sys::path::has_extension(OutputFile)) { CmdArgs.push_back(Args.MakeArgString(Twine(OutputFile) + ".exe")); - else + OutputFile = CmdArgs.back(); + } else CmdArgs.push_back(OutputFile); Args.AddAllArgs(CmdArgs, options::OPT_e); @@ -318,8 +319,9 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } const char *Exec = Args.MakeArgString(TC.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileUTF8(), + Exec, CmdArgs, Inputs, Output)); } // Simplified from Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple. diff --git a/clang/lib/Driver/ToolChains/Minix.cpp b/clang/lib/Driver/ToolChains/Minix.cpp index d0314795620ce..44479a24ebe78 100644 --- a/clang/lib/Driver/ToolChains/Minix.cpp +++ b/clang/lib/Driver/ToolChains/Minix.cpp @@ -36,8 +36,9 @@ void tools::minix::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void tools::minix::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -89,8 +90,9 @@ void tools::minix::Linker::ConstructJob(Compilation &C, const JobAction &JA, } const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } /// Minix - Minix tool chain which can call as(1) and ld(1) directly. diff --git a/clang/lib/Driver/ToolChains/Myriad.cpp b/clang/lib/Driver/ToolChains/Myriad.cpp index 84fe4748b6faf..ab0df5d8f1683 100644 --- a/clang/lib/Driver/ToolChains/Myriad.cpp +++ b/clang/lib/Driver/ToolChains/Myriad.cpp @@ -79,7 +79,7 @@ void tools::SHAVE::Compiler::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString(getToolChain().GetProgramPath("moviCompile")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), Args.MakeArgString(Exec), CmdArgs, - Inputs)); + Inputs, Output)); } void tools::SHAVE::Assembler::ConstructJob(Compilation &C, const JobAction &JA, @@ -115,7 +115,7 @@ void tools::SHAVE::Assembler::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString(getToolChain().GetProgramPath("moviAsm")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), Args.MakeArgString(Exec), CmdArgs, - Inputs)); + Inputs, Output)); } void tools::Myriad::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -200,9 +200,9 @@ void tools::Myriad::Linker::ConstructJob(Compilation &C, const JobAction &JA, std::string Exec = Args.MakeArgString(TC.GetProgramPath("sparc-myriad-rtems-ld")); - C.addCommand( - std::make_unique(JA, *this, ResponseFileSupport::AtFileCurCP(), - Args.MakeArgString(Exec), CmdArgs, Inputs)); + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Exec), + CmdArgs, Inputs, Output)); } MyriadToolChain::MyriadToolChain(const Driver &D, const llvm::Triple &Triple, diff --git a/clang/lib/Driver/ToolChains/NaCl.cpp b/clang/lib/Driver/ToolChains/NaCl.cpp index 15a773675299a..8a150c3947532 100644 --- a/clang/lib/Driver/ToolChains/NaCl.cpp +++ b/clang/lib/Driver/ToolChains/NaCl.cpp @@ -193,8 +193,9 @@ void nacltools::Linker::ConstructJob(Compilation &C, const JobAction &JA, } const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } /// NaCl Toolchain diff --git a/clang/lib/Driver/ToolChains/NetBSD.cpp b/clang/lib/Driver/ToolChains/NetBSD.cpp index 253ee6ce0f721..48bf061c6650d 100644 --- a/clang/lib/Driver/ToolChains/NetBSD.cpp +++ b/clang/lib/Driver/ToolChains/NetBSD.cpp @@ -103,8 +103,9 @@ void netbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString((getToolChain().GetProgramPath("as"))); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -338,8 +339,9 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, ToolChain.addProfileRTLibs(Args, CmdArgs); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } /// NetBSD - NetBSD tool chain which can call as(1) and ld(1) directly. diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp index 5ca2fa0850e63..f155d74632f93 100644 --- a/clang/lib/Driver/ToolChains/OpenBSD.cpp +++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp @@ -82,8 +82,9 @@ void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -221,8 +222,9 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, } const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } SanitizerMask OpenBSD::getSupportedSanitizers() const { diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp index 6dc81899cbaac..fab1b2ac62dfd 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.cpp +++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -66,8 +66,9 @@ void tools::PS4cpu::Assemble::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("orbis-as")); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileUTF8(), + Exec, CmdArgs, Inputs, Output)); } static void AddPS4SanitizerArgs(const ToolChain &TC, ArgStringList &CmdArgs) { @@ -152,8 +153,9 @@ void tools::PS4cpu::Link::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld")); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileUTF8(), + Exec, CmdArgs, Inputs, Output)); } toolchains::PS4CPU::PS4CPU(const Driver &D, const llvm::Triple &Triple, diff --git a/clang/lib/Driver/ToolChains/RISCVToolchain.cpp b/clang/lib/Driver/ToolChains/RISCVToolchain.cpp index cc912d94cb92f..312c8b52c5e83 100644 --- a/clang/lib/Driver/ToolChains/RISCVToolchain.cpp +++ b/clang/lib/Driver/ToolChains/RISCVToolchain.cpp @@ -191,8 +191,8 @@ void RISCV::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); - C.addCommand( - std::make_unique(JA, *this, ResponseFileSupport::AtFileCurCP(), - Args.MakeArgString(Linker), CmdArgs, Inputs)); + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker), + CmdArgs, Inputs, Output)); } // RISCV tools end. diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp index b8fdc87478bc6..4ed4d839ad106 100644 --- a/clang/lib/Driver/ToolChains/Solaris.cpp +++ b/clang/lib/Driver/ToolChains/Solaris.cpp @@ -42,7 +42,7 @@ void solaris::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -152,7 +152,7 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } static StringRef getSolarisLibSuffix(const llvm::Triple &Triple) { diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index d953082470aab..6b654886e7746 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -114,8 +114,9 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Linker, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Linker, CmdArgs, Inputs, Output)); // When optimizing, if wasm-opt is available, run it. if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { @@ -139,7 +140,7 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Output.getFilename()); C.addCommand(std::make_unique( JA, *this, ResponseFileSupport::AtFileCurCP(), WasmOpt, CmdArgs, - Inputs)); + Inputs, Output)); } } } diff --git a/clang/lib/Driver/ToolChains/XCore.cpp b/clang/lib/Driver/ToolChains/XCore.cpp index 5030c73c7d825..5f94f83d36919 100644 --- a/clang/lib/Driver/ToolChains/XCore.cpp +++ b/clang/lib/Driver/ToolChains/XCore.cpp @@ -53,7 +53,7 @@ void tools::XCore::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("xcc")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } void tools::XCore::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -82,7 +82,7 @@ void tools::XCore::Linker::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("xcc")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } /// XCore tool chain diff --git a/clang/unittests/Driver/ToolChainTest.cpp b/clang/unittests/Driver/ToolChainTest.cpp index 67bf545b14e4b..227f7c76b8a1e 100644 --- a/clang/unittests/Driver/ToolChainTest.cpp +++ b/clang/unittests/Driver/ToolChainTest.cpp @@ -259,4 +259,34 @@ TEST(ToolChainTest, GetTargetAndMode) { EXPECT_STREQ(Res.DriverMode, "--driver-mode=cl"); EXPECT_FALSE(Res.TargetIsValid); } + +TEST(ToolChainTest, CommandOutput) { + IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); + + IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); + struct TestDiagnosticConsumer : public DiagnosticConsumer {}; + DiagnosticsEngine Diags(DiagID, &*DiagOpts, new TestDiagnosticConsumer); + IntrusiveRefCntPtr InMemoryFileSystem( + new llvm::vfs::InMemoryFileSystem); + + Driver CCDriver("/home/test/bin/clang", "arm-linux-gnueabi", Diags, + "clang LLVM compiler", InMemoryFileSystem); + CCDriver.setCheckInputsExist(false); + std::unique_ptr CC( + CCDriver.BuildCompilation({"/home/test/bin/clang", "foo.cpp"})); + const JobList &Jobs = CC->getJobs(); + + const auto &CmdCompile = Jobs.getJobs().front(); + const auto &InFile = CmdCompile->getInputFilenames().front(); + EXPECT_STREQ(InFile, "foo.cpp"); + auto ObjFile = CmdCompile->getOutputFilenames().front(); + EXPECT_TRUE(StringRef(ObjFile).endswith(".o")); + + const auto &CmdLink = Jobs.getJobs().back(); + const auto LinkInFile = CmdLink->getInputFilenames().front(); + EXPECT_EQ(ObjFile, LinkInFile); + auto ExeFile = CmdLink->getOutputFilenames().front(); + EXPECT_EQ("a.out", ExeFile); +} + } // end anonymous namespace. From 47d007e74fdf77c3b95649a2c17658a6f3f50e4b Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 5 Oct 2020 13:01:48 +0300 Subject: [PATCH 314/321] Refactor OCLBuiltinFuncMangleInfo The most notable change is that usages of `std::string` were replaced with `llvm::StringRef` to simplify code and probably speedup it a little bit. --- llvm-spirv/lib/SPIRV/OCLUtil.cpp | 348 ++++++++++++++----------------- 1 file changed, 158 insertions(+), 190 deletions(-) diff --git a/llvm-spirv/lib/SPIRV/OCLUtil.cpp b/llvm-spirv/lib/SPIRV/OCLUtil.cpp index ec6ab317bd8e6..14a137dc5f62a 100644 --- a/llvm-spirv/lib/SPIRV/OCLUtil.cpp +++ b/llvm-spirv/lib/SPIRV/OCLUtil.cpp @@ -965,33 +965,42 @@ class OCLBuiltinFuncMangleInfo : public SPIRV::BuiltinFuncMangleInfo { OCLBuiltinFuncMangleInfo(ArrayRef ArgTypes) : ArgTypes(ArgTypes.vec()) {} void init(StringRef UniqName) override { - UnmangledName = UniqName.str(); - size_t Pos = std::string::npos; + // Make a local copy as we will modify the string in init function + std::string TempStorage = UniqName.str(); + auto NameRef = StringRef(TempStorage); - auto EraseSubstring = [](std::string &Str, std::string ToErase) { - size_t Pos = Str.find(ToErase); + // Helper functions to erase substrings from NameRef (i.e. TempStorage) + auto EraseSubstring = [&NameRef, &TempStorage](const std::string &ToErase) { + size_t Pos = TempStorage.find(ToErase); if (Pos != std::string::npos) { - Str.erase(Pos, ToErase.length()); + TempStorage.erase(Pos, ToErase.length()); + // re-take StringRef as TempStorage was updated + NameRef = StringRef(TempStorage); } }; + auto EraseSymbol = [&NameRef, &TempStorage](size_t Index) { + TempStorage.erase(Index, 1); + // re-take StringRef as TempStorage was updated + NameRef = StringRef(TempStorage); + }; - if (UnmangledName.find("async_work_group") == 0) { + if (NameRef.startswith("async_work_group")) { addUnsignedArg(-1); setArgAttr(1, SPIR::ATTR_CONST); - } else if (UnmangledName.find("write_imageui") == 0) + } else if (NameRef.startswith("write_imageui")) addUnsignedArg(2); - else if (UnmangledName == "prefetch") { + else if (NameRef.equals("prefetch")) { addUnsignedArg(1); setArgAttr(0, SPIR::ATTR_CONST); - } else if (UnmangledName == "get_kernel_work_group_size" || - UnmangledName == - "get_kernel_preferred_work_group_size_multiple") { + } else if (NameRef.equals("get_kernel_work_group_size") || + NameRef.equals( + "get_kernel_preferred_work_group_size_multiple")) { assert(F && "lack of necessary information"); const size_t BlockArgIdx = 0; FunctionType *InvokeTy = getBlockInvokeTy(F, BlockArgIdx); if (InvokeTy->getNumParams() > 1) setLocalArgBlock(BlockArgIdx); - } else if (UnmangledName == "enqueue_kernel") { + } else if (NameRef.equals("enqueue_kernel")) { assert(F && "lack of necessary information"); setEnumArg(1, SPIR::PRIMITIVE_KERNEL_ENQUEUE_FLAGS_T); addUnsignedArg(3); @@ -1005,97 +1014,84 @@ class OCLBuiltinFuncMangleInfo : public SPIRV::BuiltinFuncMangleInfo { addUnsignedArg(BlockArgIdx + 1); setVarArg(BlockArgIdx + 2); } - } else if (UnmangledName.find("get_") == 0 || UnmangledName == "nan" || - UnmangledName == "mem_fence" || - UnmangledName.find("shuffle") == 0) { + } else if (NameRef.startswith("get_") || NameRef.equals("nan") || + NameRef.equals("mem_fence") || NameRef.startswith("shuffle")) { addUnsignedArg(-1); - if (UnmangledName.find(kOCLBuiltinName::GetFence) == 0) { + if (NameRef.startswith(kOCLBuiltinName::GetFence)) { setArgAttr(0, SPIR::ATTR_CONST); addVoidPtrArg(0); } - } else if (UnmangledName.find("barrier") != std::string::npos) { + } else if (NameRef.contains("barrier")) { addUnsignedArg(0); - if (UnmangledName == "work_group_barrier" || - UnmangledName == "sub_group_barrier") + if (NameRef.equals("work_group_barrier") || + NameRef.equals("sub_group_barrier")) setEnumArg(1, SPIR::PRIMITIVE_MEMORY_SCOPE); - } else if (UnmangledName.find("atomic_work_item_fence") == 0) { + } else if (NameRef.startswith("atomic_work_item_fence")) { addUnsignedArg(0); setEnumArg(1, SPIR::PRIMITIVE_MEMORY_ORDER); setEnumArg(2, SPIR::PRIMITIVE_MEMORY_SCOPE); - } else if (UnmangledName.find("atom_") == 0) { + } else if (NameRef.startswith("atom_")) { setArgAttr(0, SPIR::ATTR_VOLATILE); - if (UnmangledName.find("atom_umax") == 0 || - UnmangledName.find("atom_umin") == 0) { - addUnsignedArg(0); - addUnsignedArg(1); - UnmangledName.erase(5, 1); + if (NameRef.endswith("_umax") || NameRef.endswith("_umin")) { + addUnsignedArg(-1); + // We need to remove u to match OpenCL C built-in function name + EraseSymbol(5); } - } else if (UnmangledName.find("atomic") == 0) { + } else if (NameRef.startswith("atomic")) { setArgAttr(0, SPIR::ATTR_VOLATILE); - if (UnmangledName.find("atomic_umax") == 0 || - UnmangledName.find("atomic_umin") == 0) { - addUnsignedArg(0); - addUnsignedArg(1); - UnmangledName.erase(7, 1); - } else if (UnmangledName.find("atomic_fetch_umin") == 0 || - UnmangledName.find("atomic_fetch_umax") == 0) { - addUnsignedArg(0); - addUnsignedArg(1); - UnmangledName.erase(13, 1); + if (NameRef.contains("_umax") || NameRef.contains("_umin")) { + addUnsignedArg(-1); + // We need to remove u to match OpenCL C built-in function name + if (NameRef.contains("_fetch")) + EraseSymbol(13); + else + EraseSymbol(7); } - if (UnmangledName.find("store_explicit") != std::string::npos || - UnmangledName.find("exchange_explicit") != std::string::npos || - (UnmangledName.find("atomic_fetch") == 0 && - UnmangledName.find("explicit") != std::string::npos)) { + if (NameRef.contains("store_explicit") || + NameRef.contains("exchange_explicit") || + (NameRef.startswith("atomic_fetch") && + NameRef.contains("explicit"))) { setEnumArg(2, SPIR::PRIMITIVE_MEMORY_ORDER); setEnumArg(3, SPIR::PRIMITIVE_MEMORY_SCOPE); - } else if (UnmangledName.find("load_explicit") != std::string::npos || - (UnmangledName.find("atomic_flag") == 0 && - UnmangledName.find("explicit") != std::string::npos)) { + } else if (NameRef.contains("load_explicit") || + (NameRef.startswith("atomic_flag") && + NameRef.contains("explicit"))) { setEnumArg(1, SPIR::PRIMITIVE_MEMORY_ORDER); setEnumArg(2, SPIR::PRIMITIVE_MEMORY_SCOPE); - } else if (UnmangledName.find("compare_exchange_strong_explicit") != - std::string::npos || - UnmangledName.find("compare_exchange_weak_explicit") != - std::string::npos) { + } else if (NameRef.endswith("compare_exchange_strong_explicit") || + NameRef.endswith("compare_exchange_weak_explicit")) { setEnumArg(3, SPIR::PRIMITIVE_MEMORY_ORDER); setEnumArg(4, SPIR::PRIMITIVE_MEMORY_ORDER); setEnumArg(5, SPIR::PRIMITIVE_MEMORY_SCOPE); } // Don't set atomic property to the first argument of 1.2 atomic // built-ins. - if (UnmangledName.find("atomic_add") != 0 && - UnmangledName.find("atomic_sub") != 0 && - UnmangledName.find("atomic_xchg") != 0 && - UnmangledName.find("atomic_inc") != 0 && - UnmangledName.find("atomic_dec") != 0 && - UnmangledName.find("atomic_cmpxchg") != 0 && - UnmangledName.find("atomic_min") != 0 && - UnmangledName.find("atomic_max") != 0 && - UnmangledName.find("atomic_and") != 0 && - UnmangledName.find("atomic_or") != 0 && - UnmangledName.find("atomic_xor") != 0 && - UnmangledName.find("atom_") != 0) { + if (!NameRef.endswith("xchg") && // covers _cmpxchg too + (NameRef.contains("fetch") || + !(NameRef.endswith("_add") || NameRef.endswith("_sub") || + NameRef.endswith("_inc") || NameRef.endswith("_dec") || + NameRef.endswith("_min") || NameRef.endswith("_max") || + NameRef.endswith("_and") || NameRef.endswith("_or") || + NameRef.endswith("_xor")))) { addAtomicArg(0); } - - } else if (UnmangledName.find("uconvert_") == 0) { + } else if (NameRef.startswith("uconvert_")) { addUnsignedArg(0); + NameRef = NameRef.drop_front(1); UnmangledName.erase(0, 1); - } else if (UnmangledName.find("s_") == 0) { - if (UnmangledName == "s_upsample") + } else if (NameRef.startswith("s_")) { + if (NameRef.equals("s_upsample")) addUnsignedArg(1); - UnmangledName.erase(0, 2); - } else if (UnmangledName.find("u_") == 0) { + NameRef = NameRef.drop_front(2); + } else if (NameRef.startswith("u_")) { addUnsignedArg(-1); - UnmangledName.erase(0, 2); - } else if (UnmangledName == "fclamp") { - UnmangledName.erase(0, 1); + NameRef = NameRef.drop_front(2); + } else if (NameRef.equals("fclamp")) { + NameRef = NameRef.drop_front(1); } // handle [read|write]pipe builtins (plus two i32 literal args // required by SPIR 2.0 provisional specification): - else if (UnmangledName == "read_pipe_2" || - UnmangledName == "write_pipe_2") { + else if (NameRef.equals("read_pipe_2") || NameRef.equals("write_pipe_2")) { // with 2 arguments (plus two i32 literals): // int read_pipe (read_only pipe gentype p, gentype *ptr) // int write_pipe (write_only pipe gentype p, const gentype *ptr) @@ -1103,16 +1099,16 @@ class OCLBuiltinFuncMangleInfo : public SPIRV::BuiltinFuncMangleInfo { addUnsignedArg(2); addUnsignedArg(3); // OpenCL-like representation of blocking pipes - } else if (UnmangledName == "read_pipe_2_bl" || - UnmangledName == "write_pipe_2_bl") { + } else if (NameRef.equals("read_pipe_2_bl") || + NameRef.equals("write_pipe_2_bl")) { // with 2 arguments (plus two i32 literals): // int read_pipe_bl (read_only pipe gentype p, gentype *ptr) // int write_pipe_bl (write_only pipe gentype p, const gentype *ptr) addVoidPtrArg(1); addUnsignedArg(2); addUnsignedArg(3); - } else if (UnmangledName == "read_pipe_4" || - UnmangledName == "write_pipe_4") { + } else if (NameRef.equals("read_pipe_4") || + NameRef.equals("write_pipe_4")) { // with 4 arguments (plus two i32 literals): // int read_pipe (read_only pipe gentype p, reserve_id_t reserve_id, uint // index, gentype *ptr) int write_pipe (write_only pipe gentype p, @@ -1121,163 +1117,138 @@ class OCLBuiltinFuncMangleInfo : public SPIRV::BuiltinFuncMangleInfo { addVoidPtrArg(3); addUnsignedArg(4); addUnsignedArg(5); - } else if (UnmangledName.find("reserve_read_pipe") != std::string::npos || - UnmangledName.find("reserve_write_pipe") != std::string::npos) { + } else if (NameRef.contains("reserve_read_pipe") || + NameRef.contains("reserve_write_pipe")) { // process [|work_group|sub_group]reserve[read|write]pipe builtins addUnsignedArg(1); addUnsignedArg(2); addUnsignedArg(3); - } else if (UnmangledName.find("commit_read_pipe") != std::string::npos || - UnmangledName.find("commit_write_pipe") != std::string::npos) { + } else if (NameRef.contains("commit_read_pipe") || + NameRef.contains("commit_write_pipe")) { // process [|work_group|sub_group]commit[read|write]pipe builtins addUnsignedArg(2); addUnsignedArg(3); - } else if (UnmangledName == "capture_event_profiling_info") { + } else if (NameRef.equals("capture_event_profiling_info")) { addVoidPtrArg(2); setEnumArg(1, SPIR::PRIMITIVE_CLK_PROFILING_INFO); - } else if (UnmangledName == "enqueue_marker") { + } else if (NameRef.equals("enqueue_marker")) { setArgAttr(2, SPIR::ATTR_CONST); addUnsignedArg(1); - } else if (UnmangledName.find("vload") == 0) { + } else if (NameRef.startswith("vload")) { addUnsignedArg(0); setArgAttr(1, SPIR::ATTR_CONST); - } else if (UnmangledName.find("vstore") == 0) { + } else if (NameRef.startswith("vstore")) { addUnsignedArg(1); - } else if (UnmangledName.find("ndrange_") == 0) { + } else if (NameRef.startswith("ndrange_")) { addUnsignedArg(-1); - if (UnmangledName[8] == '2' || UnmangledName[8] == '3') { + if (NameRef[8] == '2' || NameRef[8] == '3') { setArgAttr(-1, SPIR::ATTR_CONST); } - } else if ((Pos = UnmangledName.find("umax")) != std::string::npos || - (Pos = UnmangledName.find("umin")) != std::string::npos) { + } else if (NameRef.contains("umax")) { + addUnsignedArg(-1); + EraseSymbol(NameRef.find("umax")); + } else if (NameRef.contains("umin")) { addUnsignedArg(-1); - UnmangledName.erase(Pos, 1); - } else if (UnmangledName.find("broadcast") != std::string::npos) { + EraseSymbol(NameRef.find("umin")); + } else if (NameRef.contains("broadcast")) { addUnsignedArg(-1); - } else if (UnmangledName.find(kOCLBuiltinName::SampledReadImage) == 0) { - UnmangledName.erase(0, strlen(kOCLBuiltinName::Sampled)); + } else if (NameRef.startswith(kOCLBuiltinName::SampledReadImage)) { + NameRef.consume_front(kOCLBuiltinName::Sampled); addSamplerArg(1); - } else if (UnmangledName.find(kOCLSubgroupsAVCIntel::Prefix) != - std::string::npos) { - if (UnmangledName.find("evaluate_ipe") != std::string::npos) + } else if (NameRef.contains(kOCLSubgroupsAVCIntel::Prefix)) { + if (NameRef.contains("evaluate_ipe")) addSamplerArg(1); - else if (UnmangledName.find("evaluate_with_single_reference") != - std::string::npos) + else if (NameRef.contains("evaluate_with_single_reference")) addSamplerArg(2); - else if (UnmangledName.find("evaluate_with_multi_reference") != - std::string::npos) { + else if (NameRef.contains("evaluate_with_multi_reference")) { addUnsignedArg(1); std::string PostFix = "_interlaced"; - if (UnmangledName.find(PostFix) != std::string::npos) { + if (NameRef.contains(PostFix)) { addUnsignedArg(2); addSamplerArg(3); - size_t Pos = UnmangledName.find(PostFix); - if (Pos != std::string::npos) - UnmangledName.erase(Pos, PostFix.length()); + EraseSubstring(PostFix); } else addSamplerArg(2); - } else if (UnmangledName.find("evaluate_with_dual_reference") != - std::string::npos) + } else if (NameRef.contains("evaluate_with_dual_reference")) addSamplerArg(3); - else if (UnmangledName.find("fme_initialize") != std::string::npos) + else if (NameRef.contains("fme_initialize")) addUnsignedArgs(0, 6); - else if (UnmangledName.find("bme_initialize") != std::string::npos) + else if (NameRef.contains("bme_initialize")) addUnsignedArgs(0, 7); - else if (UnmangledName.find("set_inter_base_multi_reference_penalty") != - std::string::npos || - UnmangledName.find("set_inter_shape_penalty") != - std::string::npos || - UnmangledName.find("set_inter_direction_penalty") != - std::string::npos) + else if (NameRef.contains("set_inter_base_multi_reference_penalty") || + NameRef.contains("set_inter_shape_penalty") || + NameRef.contains("set_inter_direction_penalty")) addUnsignedArg(0); - else if (UnmangledName.find("set_motion_vector_cost_function") != - std::string::npos) + else if (NameRef.contains("set_motion_vector_cost_function")) addUnsignedArgs(0, 2); - else if (UnmangledName.find("interlaced_field_polarity") != - std::string::npos) + else if (NameRef.contains("interlaced_field_polarity")) addUnsignedArg(0); - else if (UnmangledName.find("interlaced_field_polarities") != - std::string::npos) + else if (NameRef.contains("interlaced_field_polarities")) addUnsignedArgs(0, 1); - else if (UnmangledName.find(kOCLSubgroupsAVCIntel::MCEPrefix) != - std::string::npos) { - if (UnmangledName.find("get_default") != std::string::npos) + else if (NameRef.contains(kOCLSubgroupsAVCIntel::MCEPrefix)) { + if (NameRef.contains("get_default")) addUnsignedArgs(0, 1); - } else if (UnmangledName.find(kOCLSubgroupsAVCIntel::IMEPrefix) != - std::string::npos) { - if (UnmangledName.find("initialize") != std::string::npos) + } else if (NameRef.contains(kOCLSubgroupsAVCIntel::IMEPrefix)) { + if (NameRef.contains("initialize")) addUnsignedArgs(0, 2); - else if (UnmangledName.find("set_single_reference") != - std::string::npos) + else if (NameRef.contains("set_single_reference")) addUnsignedArg(1); - else if (UnmangledName.find("set_dual_reference") != std::string::npos) + else if (NameRef.contains("set_dual_reference")) addUnsignedArg(2); - else if (UnmangledName.find("set_weighted_sad") != std::string::npos || - UnmangledName.find("set_early_search_termination_threshold") != - std::string::npos) + else if (NameRef.contains("set_weighted_sad") || + NameRef.contains("set_early_search_termination_threshold")) addUnsignedArg(0); - else if (UnmangledName.find("adjust_ref_offset") != std::string::npos) + else if (NameRef.contains("adjust_ref_offset")) addUnsignedArgs(1, 3); - else if (UnmangledName.find("set_max_motion_vector_count") != - std::string::npos || - UnmangledName.find("get_border_reached") != std::string::npos) + else if (NameRef.contains("set_max_motion_vector_count") || + NameRef.contains("get_border_reached")) addUnsignedArg(0); - else if (UnmangledName.find("shape_distortions") != std::string::npos || - UnmangledName.find("shape_motion_vectors") != - std::string::npos || - UnmangledName.find("shape_reference_ids") != - std::string::npos) { - if (UnmangledName.find("single_reference") != std::string::npos) { + else if (NameRef.contains("shape_distortions") || + NameRef.contains("shape_motion_vectors") || + NameRef.contains("shape_reference_ids")) { + if (NameRef.contains("single_reference")) { addUnsignedArg(1); - EraseSubstring(UnmangledName, "_single_reference"); - } else if (UnmangledName.find("dual_reference") != - std::string::npos) { + EraseSubstring("_single_reference"); + } else if (NameRef.contains("dual_reference")) { addUnsignedArgs(1, 2); - EraseSubstring(UnmangledName, "_dual_reference"); + EraseSubstring("_dual_reference"); } - } else if (UnmangledName.find("ref_window_size") != std::string::npos) + } else if (NameRef.contains("ref_window_size")) addUnsignedArg(0); - } else if (UnmangledName.find(kOCLSubgroupsAVCIntel::SICPrefix) != - std::string::npos) { - if (UnmangledName.find("initialize") != std::string::npos || - UnmangledName.find("set_intra_luma_shape_penalty") != - std::string::npos) + } else if (NameRef.contains(kOCLSubgroupsAVCIntel::SICPrefix)) { + if (NameRef.contains("initialize") || + NameRef.contains("set_intra_luma_shape_penalty")) addUnsignedArg(0); - else if (UnmangledName.find("configure_ipe") != std::string::npos) { - if (UnmangledName.find("_luma") != std::string::npos) { + else if (NameRef.contains("configure_ipe")) { + if (NameRef.contains("_luma")) { addUnsignedArgs(0, 6); - EraseSubstring(UnmangledName, "_luma"); + EraseSubstring("_luma"); } - if (UnmangledName.find("_chroma") != std::string::npos) { + if (NameRef.contains("_chroma")) { addUnsignedArgs(7, 9); - EraseSubstring(UnmangledName, "_chroma"); + EraseSubstring("_chroma"); } - } else if (UnmangledName.find("configure_skc") != std::string::npos) + } else if (NameRef.contains("configure_skc")) addUnsignedArgs(0, 4); - else if (UnmangledName.find("set_skc") != std::string::npos) { - if (UnmangledName.find("forward_transform_enable")) + else if (NameRef.contains("set_skc")) { + if (NameRef.contains("forward_transform_enable")) addUnsignedArg(0); - } else if (UnmangledName.find("set_block") != std::string::npos) { - if (UnmangledName.find("based_raw_skip_sad") != std::string::npos) + } else if (NameRef.contains("set_block")) { + if (NameRef.contains("based_raw_skip_sad")) addUnsignedArg(0); - } else if (UnmangledName.find("get_motion_vector_mask") != - std::string::npos) { + } else if (NameRef.contains("get_motion_vector_mask")) { addUnsignedArgs(0, 1); - } else if (UnmangledName.find("luma_mode_cost_function") != - std::string::npos) + } else if (NameRef.contains("luma_mode_cost_function")) addUnsignedArgs(0, 2); - else if (UnmangledName.find("chroma_mode_cost_function") != - std::string::npos) + else if (NameRef.contains("chroma_mode_cost_function")) addUnsignedArg(0); } - } else if (UnmangledName == "intel_sub_group_shuffle_down" || - UnmangledName == "intel_sub_group_shuffle_up") { - addUnsignedArg(2); - } else if (UnmangledName == "intel_sub_group_shuffle" || - UnmangledName == "intel_sub_group_shuffle_xor") { - addUnsignedArg(1); - } else if (UnmangledName.find("intel_sub_group_block_write") != - std::string::npos) { + } else if (NameRef.startswith("intel_sub_group_shuffle")) { + if (NameRef.endswith("_down") || NameRef.endswith("_up")) + addUnsignedArg(2); + else + addUnsignedArg(1); + } else if (NameRef.startswith("intel_sub_group_block_write")) { // distinguish write to image and other data types as position // of uint argument is different though name is the same. assert(ArgTypes.size() && "lack of necessary information"); @@ -1288,8 +1259,7 @@ class OCLBuiltinFuncMangleInfo : public SPIRV::BuiltinFuncMangleInfo { } else { addUnsignedArg(2); } - } else if (UnmangledName.find("intel_sub_group_block_read") != - std::string::npos) { + } else if (NameRef.startswith("intel_sub_group_block_read")) { // distinguish read from image and other data types as position // of uint argument is different though name is the same. assert(ArgTypes.size() && "lack of necessary information"); @@ -1298,26 +1268,24 @@ class OCLBuiltinFuncMangleInfo : public SPIRV::BuiltinFuncMangleInfo { setArgAttr(0, SPIR::ATTR_CONST); addUnsignedArg(0); } - } else if (UnmangledName.find("intel_sub_group_media_block_write") != - std::string::npos) { + } else if (NameRef.startswith("intel_sub_group_media_block_write")) { addUnsignedArg(3); - } else if (UnmangledName.find(kOCLBuiltinName::SubGroupPrefix) != - std::string::npos) { - if (UnmangledName.find("ballot") != std::string::npos) { - if (UnmangledName.find("inverse") != std::string::npos || - UnmangledName.find("bit_count") != std::string::npos || - UnmangledName.find("inclusive_scan") != std::string::npos || - UnmangledName.find("exclusive_scan") != std::string::npos || - UnmangledName.find("find_lsb") != std::string::npos || - UnmangledName.find("find_msb") != std::string::npos) + } else if (NameRef.startswith(kOCLBuiltinName::SubGroupPrefix)) { + if (NameRef.contains("ballot")) { + if (NameRef.contains("inverse") || NameRef.contains("bit_count") || + NameRef.contains("inclusive_scan") || + NameRef.contains("exclusive_scan") || + NameRef.contains("find_lsb") || NameRef.contains("find_msb")) addUnsignedArg(0); - else if (UnmangledName.find("bit_extract") != std::string::npos) { + else if (NameRef.contains("bit_extract")) { addUnsignedArgs(0, 1); } - } else if (UnmangledName.find("shuffle") != std::string::npos || - UnmangledName.find("clustered") != std::string::npos) + } else if (NameRef.contains("shuffle") || NameRef.contains("clustered")) addUnsignedArg(1); } + + // Store the final version of a function name + UnmangledName = NameRef.str(); } // Auxiliarry information, it is expected that it is relevant at the moment // the init method is called. From 05b4cda513e148ede4c3d2eca6476e4f4749ee78 Mon Sep 17 00:00:00 2001 From: Mochalova Anastasiya Date: Wed, 7 Oct 2020 11:11:16 +0300 Subject: [PATCH 315/321] Add llvm.abs.i32 translation (#758) * Add llvm.abs.i32 intrinsic translation Add translation of @llvm.abs.* to `s_abs` extended instruction. Signed-off-by: amochalo --- llvm-spirv/lib/SPIRV/SPIRVUtil.cpp | 15 +++++++++++ llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 12 +++++++++ llvm-spirv/test/abs.ll | 40 ++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+) create mode 100644 llvm-spirv/test/abs.ll diff --git a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp index a836ff9bc9c47..ca0a8b509fda3 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp @@ -1551,6 +1551,21 @@ bool checkTypeForSPIRVExtendedInstLowering(IntrinsicInst *II, SPIRVModule *BM) { } break; } + case Intrinsic::abs: { + Type *Ty = II->getType(); + int NumElems = 1; + if (auto *VecTy = dyn_cast(Ty)) { + NumElems = VecTy->getNumElements(); + Ty = VecTy->getElementType(); + } + if ((!Ty->isIntegerTy()) || + ((NumElems > 4) && (NumElems != 8) && (NumElems != 16))) { + BM->getErrorLog().checkError(false, SPIRVEC_InvalidFunctionCall, + II->getCalledOperand()->getName().str(), "", + __FILE__, __LINE__); + } + break; + } default: break; } diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 629dfd51d0107..7015cac262833 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -1961,6 +1961,7 @@ bool LLVMToSPIRV::isKnownIntrinsic(Intrinsic::ID Id) { case Intrinsic::bitreverse: case Intrinsic::sqrt: case Intrinsic::fabs: + case Intrinsic::abs: case Intrinsic::ceil: case Intrinsic::ctlz: case Intrinsic::cttz: @@ -2075,6 +2076,17 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, return BM->addExtInst(STy, BM->getExtInstSetId(SPIRVEIS_OpenCL), ExtOp, Ops, BB); } + case Intrinsic::abs: { + if (!checkTypeForSPIRVExtendedInstLowering(II, BM)) + break; + // LLVM has only one version of abs and it is only for signed integers. We + // unconditionally choose SAbs here + SPIRVWord ExtOp = OpenCLLIB::SAbs; + SPIRVType *STy = transType(II->getType()); + std::vector Ops(1, transValue(II->getArgOperand(0), BB)); + return BM->addExtInst(STy, BM->getExtInstSetId(SPIRVEIS_OpenCL), ExtOp, Ops, + BB); + } case Intrinsic::ceil: { if (!checkTypeForSPIRVExtendedInstLowering(II, BM)) break; diff --git a/llvm-spirv/test/abs.ll b/llvm-spirv/test/abs.ll new file mode 100644 index 0000000000000..a8eaa6e638c6a --- /dev/null +++ b/llvm-spirv/test/abs.ll @@ -0,0 +1,40 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: spirv-val %t.spv + + +; CHECK: ExtInst {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} s_abs +; CHECK: ExtInst {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} s_abs + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-linux-sycldevice" + +; Function Attrs: norecurse nounwind readnone +define dso_local spir_kernel void @test(i32 %a, <4 x i32> %b) local_unnamed_addr #0 !kernel_arg_buffer_location !5 { +entry: + %0 = tail call i32 @llvm.abs.i32(i32 %a, i1 0) #2 + %1 = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> %b, i1 0) #2 + ret void +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare i32 @llvm.abs.i32(i32, i1) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) #1 + +attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test.cl" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.spir.version = !{!2, !2} +!spirv.Source = !{!3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 0} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"clang version 12.0.0 (https://github.com/c199914007/llvm.git 7f855fa5b04d46494c34a425aa777f8bfc3433b1)"} +!5 = !{i32 -1} From 5cd55b4845e20c9b1c66f6979bb537b0a83eafa1 Mon Sep 17 00:00:00 2001 From: Artem Gindinson Date: Sun, 4 Oct 2020 09:51:55 +0300 Subject: [PATCH 316/321] Fix IVDep translation for accesses to kernel closure fields When IVDep is applied to a regular kernel-scope array/pointer variable, the GEP instructions that access this variable are marked with index group metadata. When IVDep is applied to a captured argument of a kernel, however, the pointer operand of index group-marked instructions becomes a GEP itself. The latter GEP accesses the closure of a kernel lambda with an offset that signifies the number of the captured parameter in the closure layout. To treat the second case: 1. In forward translation, we ensure that for each memory block, all pointer accesses marked into IVDep index groups are recognized and translated into LoopControl parameters. Previously, only the latest pointer access would get listed in the SPIR-V representation of attribute parameters. 2. In backward translation, we also differentiate between GEP instructions that access an array variable directly and the GEPs accessing the "closure field accesses". An additional map stores 1 index group metadata node per each "pointer to closure + integer offset" pair. This ensures that all accesses to the same kernel parameter are marked into the same index group on a given loop nest level. Signed-off-by: Artem Gindinson --- llvm-spirv/lib/SPIRV/SPIRVReader.cpp | 61 +- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 7 +- llvm-spirv/lib/SPIRV/SPIRVWriter.h | 3 +- .../transcoding/FPGAIVDepLoopAttrOnClosure.ll | 601 ++++++++++++++++++ 4 files changed, 653 insertions(+), 19 deletions(-) create mode 100644 llvm-spirv/test/transcoding/FPGAIVDepLoopAttrOnClosure.ll diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp index dbc412b8cd100..090c0d932f6a2 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp @@ -53,6 +53,7 @@ #include "VectorComputeUtil.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Constants.h" @@ -803,8 +804,8 @@ void SPIRVToLLVM::setLLVMLoopMetadata(const LoopInstType *LM, "Missing loop control parameter!"); } if (LC & LoopControlDependencyArrayINTELMask) { - // Collect array variable <-> safelen information - std::map ArraySflnMap; + // Collect pointer variable <-> safelen information + std::map PointerSflnMap; unsigned NumOperandPairs = LoopControlParameters[NumParam]; unsigned OperandsEndIndex = NumParam + NumOperandPairs * 2; assert(OperandsEndIndex <= LoopControlParameters.size() && @@ -812,9 +813,9 @@ void SPIRVToLLVM::setLLVMLoopMetadata(const LoopInstType *LM, SPIRVModule *M = LM->getModule(); while (NumParam < OperandsEndIndex) { SPIRVId ArraySPIRVId = LoopControlParameters[++NumParam]; - Value *ArrayVar = ValueMap[M->getValue(ArraySPIRVId)]; + Value *PointerVar = ValueMap[M->getValue(ArraySPIRVId)]; unsigned Safelen = LoopControlParameters[++NumParam]; - ArraySflnMap.emplace(ArrayVar, Safelen); + PointerSflnMap.emplace(PointerVar, Safelen); } // A single run over the loop to retrieve all GetElementPtr instructions @@ -826,24 +827,54 @@ void SPIRVToLLVM::setLLVMLoopMetadata(const LoopInstType *LM, if (!GEP) continue; - Value *AccessedArray = GEP->getPointerOperand(); - auto ArraySflnIt = ArraySflnMap.find(AccessedArray); - if (ArraySflnIt != ArraySflnMap.end()) - ArrayGEPMap[AccessedArray].push_back(GEP); + Value *AccessedPointer = GEP->getPointerOperand(); + auto PointerSflnIt = PointerSflnMap.find(AccessedPointer); + if (PointerSflnIt != PointerSflnMap.end()) { + ArrayGEPMap[AccessedPointer].push_back(GEP); + } } } - // Create index group metadata nodes - one per each array + // Create index group metadata nodes - one per each of the array // variables. Mark each GEP accessing a particular array variable // into a corresponding index group - std::map> SafelenIdxGroupMap; + std::map> SafelenIdxGroupMap; + // Whenever a kernel closure field access is pointed to instead of + // an array/pointer variable, ensure that all GEPs to that memory + // share the same index group by hashing the newly added index groups. + // "Memory offset info" represents a handle to the whole closure block + // + an integer offset to a particular captured parameter. + using MemoryOffsetInfo = std::pair; + std::map OffsetIdxGroupMap; + for (auto &ArrayGEPIt : ArrayGEPMap) { - // Emit a distinct index group that will be referenced from - // llvm.loop.parallel_access_indices metadata - auto *CurrentDepthIdxGroup = llvm::MDNode::getDistinct(*Context, None); - unsigned Safelen = ArraySflnMap.find(ArrayGEPIt.first)->second; - SafelenIdxGroupMap[Safelen].push_back(CurrentDepthIdxGroup); + MDNode *CurrentDepthIdxGroup = nullptr; + if (auto *PrecedingGEP = dyn_cast(ArrayGEPIt.first)) { + Value *ClosureFieldPointer = PrecedingGEP->getPointerOperand(); + unsigned Offset = + cast(PrecedingGEP->getOperand(2))->getZExtValue(); + MemoryOffsetInfo Info{ClosureFieldPointer, Offset}; + auto OffsetIdxGroupIt = OffsetIdxGroupMap.find(Info); + if (OffsetIdxGroupIt == OffsetIdxGroupMap.end()) { + // This is the first GEP encountered for this closure field. + // Emit a distinct index group that will be referenced from + // llvm.loop.parallel_access_indices metadata; hash the new + // MDNode for future accesses to the same memory. + CurrentDepthIdxGroup = llvm::MDNode::getDistinct(*Context, None); + OffsetIdxGroupMap.emplace(Info, CurrentDepthIdxGroup); + } else { + // Previous accesses to that field have already been indexed, + // just use the already-existing metadata. + CurrentDepthIdxGroup = OffsetIdxGroupIt->second; + } + } else /* Regular kernel-scope array/pointer variable */ { + // Emit a distinct index group that will be referenced from + // llvm.loop.parallel_access_indices metadata + CurrentDepthIdxGroup = llvm::MDNode::getDistinct(*Context, None); + } + unsigned Safelen = PointerSflnMap.find(ArrayGEPIt.first)->second; + SafelenIdxGroupMap[Safelen].insert(CurrentDepthIdxGroup); for (auto *GEP : ArrayGEPIt.second) { StringRef IdxGroupMDName("llvm.index.group"); llvm::MDNode *PreviousIdxGroup = GEP->getMetadata(IdxGroupMDName); diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 7015cac262833..4db11b527b7c6 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -951,7 +951,8 @@ class LLVMParallelAccessIndices { // survive the translation. This check should be replaced with an // assertion once all known cases are handled. if (IdxGroupArrayPairIt != IndexGroupArrayMap.end()) - ArrayVariablesVec.push_back(IdxGroupArrayPairIt->second); + for (SPIRVId ArrayAccessId : IdxGroupArrayPairIt->second) + ArrayVariablesVec.push_back(ArrayAccessId); } } @@ -1476,13 +1477,13 @@ SPIRVValue *LLVMToSPIRV::transValueWithoutDecoration(Value *V, // 1) The metadata node has no operands. It will be directly referenced // from within the optimization hint metadata. if (NumOperands == 0) - IndexGroupArrayMap[IndexGroup] = AccessedArrayId; + IndexGroupArrayMap[IndexGroup].insert(AccessedArrayId); // 2) The metadata node has several operands. It serves to link an index // group specific to some embedded loop with other index groups that // mark the same array variable for the outer loop(s). for (unsigned I = 0; I < NumOperands; ++I) { auto *ContainedIndexGroup = getMDOperandAsMDNode(IndexGroup, I); - IndexGroupArrayMap[ContainedIndexGroup] = AccessedArrayId; + IndexGroupArrayMap[ContainedIndexGroup].insert(AccessedArrayId); } } diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.h b/llvm-spirv/lib/SPIRV/SPIRVWriter.h index 40f8b991324b6..c8f48a04638e4 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.h +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.h @@ -53,6 +53,7 @@ #include "SPIRVType.h" #include "SPIRVValue.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/IR/IntrinsicInst.h" @@ -138,7 +139,7 @@ class LLVMToSPIRV : public ModulePass { typedef DenseMap LLVMToSPIRVTypeMap; typedef DenseMap LLVMToSPIRVValueMap; - typedef DenseMap LLVMToSPIRVMetadataMap; + typedef DenseMap> LLVMToSPIRVMetadataMap; private: Module *M; diff --git a/llvm-spirv/test/transcoding/FPGAIVDepLoopAttrOnClosure.ll b/llvm-spirv/test/transcoding/FPGAIVDepLoopAttrOnClosure.ll new file mode 100644 index 0000000000000..439f04bd28f2c --- /dev/null +++ b/llvm-spirv/test/transcoding/FPGAIVDepLoopAttrOnClosure.ll @@ -0,0 +1,601 @@ +; This LLVM IR was generated using Intel SYCL Clang compiler (https://github.com/intel/llvm) +; +; SYCL source code of this program can be found below. +; +; template +; __attribute__((sycl_kernel)) +; void kernel(Func f) { +; f(); +; } +; +; int main() { +; int buf1[10], buf2[10]; +; const int c = 42; +; +; kernel([=]() mutable { +; [[intelfpga::ivdep(buf1, 3)]] +; for (int i = 0; i < 6; ++i) { +; buf1[i] *= (buf2[i + 4] + c); +; [[intelfpga::ivdep(2)]] +; for (int j = 0; j < 7; ++j) +; buf2[i] *= (buf1[i] + buf2[i + 3]); +; } +; }); +; +; kernel([=]() mutable { +; [[intelfpga::ivdep(buf1, 3)]] +; [[intelfpga::ivdep(buf2, 2)]] +; for (int i = 0; i < 6; ++i) { +; buf1[i] *= (buf2[i + 4] + c); +; buf2[i] *= (buf1[i] + buf2[i + 3]); +; } +; }); +; +; return 0; +; } + +; RUN: llvm-as < %s > %t.bc +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_fpga_loop_controls -o %t.spv +; RUN: llvm-spirv -to-text %t.spv -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV + +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis %t.rev.bc -o %t.rev.ll + +; CHECK-LLVM is the base prefix, which includes simple checks for +; "llvm.loop.parallel_access_indices" MD nodes with only 1 index group operand +; CHECK-LLVM-MD-OP- is the group of prefixes to check for more +; complicated cases of "llvm.loop.parallel_access_indices" nodes, the ones +; containing multiple index group operands that could come in indeterminate order +; RUN: FileCheck < %t.rev.ll %s --check-prefixes=CHECK-LLVM,CHECK-LLVM-MD-OP1 +; RUN: FileCheck < %t.rev.ll %s --check-prefixes=CHECK-LLVM,CHECK-LLVM-MD-OP2 + +; CHECK-SPIRV: 2 Capability FPGALoopControlsINTEL +; CHECK-SPIRV: 9 Extension "SPV_INTEL_fpga_loop_controls" +; CHECK-SPIRV-DAG: TypeInt [[TYPE_INT_64:[0-9]+]] 64 0 +; CHECK-SPIRV-DAG: TypeInt [[TYPE_INT_32:[0-9]+]] 32 0 +; CHECK-SPIRV-DAG: Constant [[TYPE_INT_64]] [[SIZE:[0-9]+]] 10 0 +; CHECK-SPIRV-DAG: Constant [[TYPE_INT_32]] [[OFFSET_CONST_0:[0-9]+]] 0 +; CHECK-SPIRV-DAG: Constant [[TYPE_INT_32]] [[OFFSET_CONST_1:[0-9]+]] 1 +; CHECK-SPIRV: TypeArray [[TYPE_ARRAY:[0-9]+]] [[TYPE_INT_32]] [[SIZE]] +; CHECK-SPIRV: TypeStruct [[TYPE_EMB_CLOSURE_STRUCT:[0-9]+]] [[TYPE_ARRAY]] [[TYPE_ARRAY]] +; The next type is only used when initializing the memory fields +; CHECK-SPIRV: TypePointer [[TYPE_CLOSURE_INIT_PTR:[0-9]+]] {{[0-9]+}} [[TYPE_EMB_CLOSURE_STRUCT]] +; This is the type used in the kernel function +; CHECK-SPIRV: TypePointer [[TYPE_EMB_CLOSURE_PTR:[0-9]+]] {{[0-9]+}} [[TYPE_EMB_CLOSURE_STRUCT]] +; CHECK-SPIRV: TypeFunction [[TYPE_EMB_FUNC:[0-9]+]] {{[0-9]+}} [[TYPE_EMB_CLOSURE_PTR]] +; CHECK-SPIRV: TypePointer [[TYPE_EMB_CLOSURE_PARAM_PTR:[0-9]+]] {{[0-9]+}} [[TYPE_EMB_CLOSURE_PTR]] +; CHECK-SPIRV: TypePointer [[TYPE_PTR:[0-9]+]] {{[0-9]+}} [[TYPE_ARRAY]] +; CHECK-SPIRV: TypeStruct [[TYPE_SFLN_CLOSURE_STRUCT:[0-9]+]] [[TYPE_ARRAY]] [[TYPE_ARRAY]] +; The next type is only used when initializing the memory fields +; CHECK-SPIRV: TypePointer [[TYPE_CLOSURE_INIT_PTR:[0-9]+]] {{[0-9]+}} [[TYPE_SFLN_CLOSURE_STRUCT]] +; This is the type used in the kernel function +; CHECK-SPIRV: TypePointer [[TYPE_SFLN_CLOSURE_PTR:[0-9]+]] {{[0-9]+}} [[TYPE_SFLN_CLOSURE_STRUCT]] +; CHECK-SPIRV: TypeFunction [[TYPE_SFLN_FUNC:[0-9]+]] {{[0-9]+}} [[TYPE_SFLN_CLOSURE_PTR]] +; CHECK-SPIRV: TypePointer [[TYPE_SFLN_CLOSURE_PARAM_PTR:[0-9]+]] {{[0-9]+}} [[TYPE_SFLN_CLOSURE_PTR]] + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown-sycldevice" + +; CHECK-LLVM: %[[CLOSURE_NAME_EMB:"class.*"]] = type { [10 x i32], [10 x i32] } +; CHECK-LLVM: %[[CLOSURE_NAME_SFLN:"class.*"]] = type { [10 x i32], [10 x i32] } +%"class._ZTSZ4mainE3$_0.anon" = type { [10 x i32], [10 x i32] } +%"class._ZTSZ4mainE3$_0.anon.0" = type { [10 x i32], [10 x i32] } + +; Function Attrs: convergent norecurse +define dso_local spir_kernel void @_ZTSZ4mainE16EmbeddedLoopTest(i32 %_arg_, i32 %_arg_1, i32 %_arg_3, i32 %_arg_5, i32 %_arg_7, i32 %_arg_9, i32 %_arg_11, i32 %_arg_13, i32 %_arg_15, i32 %_arg_17, i32 %_arg_19, i32 %_arg_21, i32 %_arg_23, i32 %_arg_25, i32 %_arg_27, i32 %_arg_29, i32 %_arg_31, i32 %_arg_33, i32 %_arg_35, i32 %_arg_37) #0 !kernel_arg_buffer_location !4 { +entry: + %_arg_.addr = alloca i32, align 4 + %_arg_.addr2 = alloca i32, align 4 + %_arg_.addr4 = alloca i32, align 4 + %_arg_.addr6 = alloca i32, align 4 + %_arg_.addr8 = alloca i32, align 4 + %_arg_.addr10 = alloca i32, align 4 + %_arg_.addr12 = alloca i32, align 4 + %_arg_.addr14 = alloca i32, align 4 + %_arg_.addr16 = alloca i32, align 4 + %_arg_.addr18 = alloca i32, align 4 + %_arg_.addr20 = alloca i32, align 4 + %_arg_.addr22 = alloca i32, align 4 + %_arg_.addr24 = alloca i32, align 4 + %_arg_.addr26 = alloca i32, align 4 + %_arg_.addr28 = alloca i32, align 4 + %_arg_.addr30 = alloca i32, align 4 + %_arg_.addr32 = alloca i32, align 4 + %_arg_.addr34 = alloca i32, align 4 + %_arg_.addr36 = alloca i32, align 4 + %_arg_.addr38 = alloca i32, align 4 + %0 = alloca %"class._ZTSZ4mainE3$_0.anon", align 4 + store i32 %_arg_, i32* %_arg_.addr, align 4, !tbaa !5 + store i32 %_arg_1, i32* %_arg_.addr2, align 4, !tbaa !5 + store i32 %_arg_3, i32* %_arg_.addr4, align 4, !tbaa !5 + store i32 %_arg_5, i32* %_arg_.addr6, align 4, !tbaa !5 + store i32 %_arg_7, i32* %_arg_.addr8, align 4, !tbaa !5 + store i32 %_arg_9, i32* %_arg_.addr10, align 4, !tbaa !5 + store i32 %_arg_11, i32* %_arg_.addr12, align 4, !tbaa !5 + store i32 %_arg_13, i32* %_arg_.addr14, align 4, !tbaa !5 + store i32 %_arg_15, i32* %_arg_.addr16, align 4, !tbaa !5 + store i32 %_arg_17, i32* %_arg_.addr18, align 4, !tbaa !5 + store i32 %_arg_19, i32* %_arg_.addr20, align 4, !tbaa !5 + store i32 %_arg_21, i32* %_arg_.addr22, align 4, !tbaa !5 + store i32 %_arg_23, i32* %_arg_.addr24, align 4, !tbaa !5 + store i32 %_arg_25, i32* %_arg_.addr26, align 4, !tbaa !5 + store i32 %_arg_27, i32* %_arg_.addr28, align 4, !tbaa !5 + store i32 %_arg_29, i32* %_arg_.addr30, align 4, !tbaa !5 + store i32 %_arg_31, i32* %_arg_.addr32, align 4, !tbaa !5 + store i32 %_arg_33, i32* %_arg_.addr34, align 4, !tbaa !5 + store i32 %_arg_35, i32* %_arg_.addr36, align 4, !tbaa !5 + store i32 %_arg_37, i32* %_arg_.addr38, align 4, !tbaa !5 + %1 = bitcast %"class._ZTSZ4mainE3$_0.anon"* %0 to i8* + call void @llvm.lifetime.start.p0i8(i64 80, i8* %1) #3 + %2 = getelementptr inbounds %"class._ZTSZ4mainE3$_0.anon", %"class._ZTSZ4mainE3$_0.anon"* %0, i32 0, i32 0 + %arrayinit.begin = getelementptr inbounds [10 x i32], [10 x i32]* %2, i64 0, i64 0 + %3 = load i32, i32* %_arg_.addr, align 4, !tbaa !5 + store i32 %3, i32* %arrayinit.begin, align 4, !tbaa !5 + %arrayinit.element = getelementptr inbounds i32, i32* %arrayinit.begin, i64 1 + %4 = load i32, i32* %_arg_.addr2, align 4, !tbaa !5 + store i32 %4, i32* %arrayinit.element, align 4, !tbaa !5 + %arrayinit.element39 = getelementptr inbounds i32, i32* %arrayinit.element, i64 1 + %5 = load i32, i32* %_arg_.addr4, align 4, !tbaa !5 + store i32 %5, i32* %arrayinit.element39, align 4, !tbaa !5 + %arrayinit.element40 = getelementptr inbounds i32, i32* %arrayinit.element39, i64 1 + %6 = load i32, i32* %_arg_.addr6, align 4, !tbaa !5 + store i32 %6, i32* %arrayinit.element40, align 4, !tbaa !5 + %arrayinit.element41 = getelementptr inbounds i32, i32* %arrayinit.element40, i64 1 + %7 = load i32, i32* %_arg_.addr8, align 4, !tbaa !5 + store i32 %7, i32* %arrayinit.element41, align 4, !tbaa !5 + %arrayinit.element42 = getelementptr inbounds i32, i32* %arrayinit.element41, i64 1 + %8 = load i32, i32* %_arg_.addr10, align 4, !tbaa !5 + store i32 %8, i32* %arrayinit.element42, align 4, !tbaa !5 + %arrayinit.element43 = getelementptr inbounds i32, i32* %arrayinit.element42, i64 1 + %9 = load i32, i32* %_arg_.addr12, align 4, !tbaa !5 + store i32 %9, i32* %arrayinit.element43, align 4, !tbaa !5 + %arrayinit.element44 = getelementptr inbounds i32, i32* %arrayinit.element43, i64 1 + %10 = load i32, i32* %_arg_.addr14, align 4, !tbaa !5 + store i32 %10, i32* %arrayinit.element44, align 4, !tbaa !5 + %arrayinit.element45 = getelementptr inbounds i32, i32* %arrayinit.element44, i64 1 + %11 = load i32, i32* %_arg_.addr16, align 4, !tbaa !5 + store i32 %11, i32* %arrayinit.element45, align 4, !tbaa !5 + %arrayinit.element46 = getelementptr inbounds i32, i32* %arrayinit.element45, i64 1 + %12 = load i32, i32* %_arg_.addr18, align 4, !tbaa !5 + store i32 %12, i32* %arrayinit.element46, align 4, !tbaa !5 + %13 = getelementptr inbounds %"class._ZTSZ4mainE3$_0.anon", %"class._ZTSZ4mainE3$_0.anon"* %0, i32 0, i32 1 + %arrayinit.begin47 = getelementptr inbounds [10 x i32], [10 x i32]* %13, i64 0, i64 0 + %14 = load i32, i32* %_arg_.addr20, align 4, !tbaa !5 + store i32 %14, i32* %arrayinit.begin47, align 4, !tbaa !5 + %arrayinit.element48 = getelementptr inbounds i32, i32* %arrayinit.begin47, i64 1 + %15 = load i32, i32* %_arg_.addr22, align 4, !tbaa !5 + store i32 %15, i32* %arrayinit.element48, align 4, !tbaa !5 + %arrayinit.element49 = getelementptr inbounds i32, i32* %arrayinit.element48, i64 1 + %16 = load i32, i32* %_arg_.addr24, align 4, !tbaa !5 + store i32 %16, i32* %arrayinit.element49, align 4, !tbaa !5 + %arrayinit.element50 = getelementptr inbounds i32, i32* %arrayinit.element49, i64 1 + %17 = load i32, i32* %_arg_.addr26, align 4, !tbaa !5 + store i32 %17, i32* %arrayinit.element50, align 4, !tbaa !5 + %arrayinit.element51 = getelementptr inbounds i32, i32* %arrayinit.element50, i64 1 + %18 = load i32, i32* %_arg_.addr28, align 4, !tbaa !5 + store i32 %18, i32* %arrayinit.element51, align 4, !tbaa !5 + %arrayinit.element52 = getelementptr inbounds i32, i32* %arrayinit.element51, i64 1 + %19 = load i32, i32* %_arg_.addr30, align 4, !tbaa !5 + store i32 %19, i32* %arrayinit.element52, align 4, !tbaa !5 + %arrayinit.element53 = getelementptr inbounds i32, i32* %arrayinit.element52, i64 1 + %20 = load i32, i32* %_arg_.addr32, align 4, !tbaa !5 + store i32 %20, i32* %arrayinit.element53, align 4, !tbaa !5 + %arrayinit.element54 = getelementptr inbounds i32, i32* %arrayinit.element53, i64 1 + %21 = load i32, i32* %_arg_.addr34, align 4, !tbaa !5 + store i32 %21, i32* %arrayinit.element54, align 4, !tbaa !5 + %arrayinit.element55 = getelementptr inbounds i32, i32* %arrayinit.element54, i64 1 + %22 = load i32, i32* %_arg_.addr36, align 4, !tbaa !5 + store i32 %22, i32* %arrayinit.element55, align 4, !tbaa !5 + %arrayinit.element56 = getelementptr inbounds i32, i32* %arrayinit.element55, i64 1 + %23 = load i32, i32* %_arg_.addr38, align 4, !tbaa !5 + store i32 %23, i32* %arrayinit.element56, align 4, !tbaa !5 + %24 = addrspacecast %"class._ZTSZ4mainE3$_0.anon"* %0 to %"class._ZTSZ4mainE3$_0.anon" addrspace(4)* + call spir_func void @"_ZZ4mainEN3$_0clEv"(%"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %24) #4 + %25 = bitcast %"class._ZTSZ4mainE3$_0.anon"* %0 to i8* + call void @llvm.lifetime.end.p0i8(i64 80, i8* %25) #3 + ret void +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; CHECK-SPIRV: Function {{.*}} [[TYPE_EMB_FUNC]] +; CHECK-LLVM: define internal spir_func void {{.*}}(%[[CLOSURE_NAME_EMB]] addrspace(4)* %this) +; Function Attrs: convergent inlinehint norecurse nounwind +define internal spir_func void @"_ZZ4mainEN3$_0clEv"(%"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %this) #2 align 2 { +entry: + ; CHECK-SPIRV: Variable [[TYPE_EMB_CLOSURE_PARAM_PTR]] [[THIS_EMB_ID:[0-9]+]] + ; CHECK-LLVM: %this.addr = alloca %[[CLOSURE_NAME_EMB]] + %this.addr = alloca %"class._ZTSZ4mainE3$_0.anon" addrspace(4)*, align 8 + %i = alloca i32, align 4 + %cleanup.dest.slot = alloca i32, align 4 + %j = alloca i32, align 4 + ; CHECK-SPIRV: Load [[TYPE_EMB_CLOSURE_PTR]] [[THIS_EMB_LOAD:[0-9]+]] [[THIS_EMB_ID]] + ; CHECK-LLVM: %this1 = load %[[CLOSURE_NAME_EMB]] addrspace(4)*, %[[CLOSURE_NAME_EMB]] addrspace(4)** %this.addr + store %"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %this, %"class._ZTSZ4mainE3$_0.anon" addrspace(4)** %this.addr, align 8, !tbaa !9 + %this1 = load %"class._ZTSZ4mainE3$_0.anon" addrspace(4)*, %"class._ZTSZ4mainE3$_0.anon" addrspace(4)** %this.addr, align 8 + %0 = bitcast i32* %i to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #3 + store i32 0, i32* %i, align 4, !tbaa !5 + br label %for.cond + +for.cond: ; preds = %for.inc18, %entry + %1 = load i32, i32* %i, align 4, !tbaa !5 + %cmp = icmp slt i32 %1, 6 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + store i32 2, i32* %cleanup.dest.slot, align 4 + %2 = bitcast i32* %i to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %2) #3 + ; Per SPIR-V spec extension INTEL/SPV_INTEL_fpga_loop_controls, + ; DependencyArrayINTEL = 0x40000 = 262144 + ; CHECK-SPIRV: LoopMerge [[MERGE_BLOCK:[0-9]+]] {{[0-9]+}} 262144 2 [[BUF1_EMB_OUTER_ID:[0-9]+]] 3 [[BUF1_EMB_INNER_ID:[0-9]+]] 3 + ; CHECK-SPIRV-NEXT: BranchConditional {{[0-9]+}} {{[0-9]+}} [[MERGE_BLOCK]] + br label %for.end20 + +for.body: ; preds = %for.cond + ; CHECK-LLVM: %[[BUF2_EMB_OUTER_CLOSURE_ACCESS:[0-9]+]] = getelementptr inbounds %[[CLOSURE_NAME_EMB]], %[[CLOSURE_NAME_EMB]] addrspace(4)* %this1, i32 0, i32 1 + %3 = getelementptr inbounds %"class._ZTSZ4mainE3$_0.anon", %"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %this1, i32 0, i32 1 + %4 = load i32, i32* %i, align 4, !tbaa !5 + %add = add nsw i32 %4, 4 + %idxprom = sext i32 %add to i64 + ; CHECK-LLVM-NOT: getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %[[BUF2_EMB_OUTER_CLOSURE_ACCESS]]{{.*}}, !llvm.index.group + %arrayidx = getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %3, i64 0, i64 %idxprom + %5 = load i32, i32 addrspace(4)* %arrayidx, align 4, !tbaa !5 + %add2 = add nsw i32 %5, 42 + ; CHECK-SPIRV: InBoundsPtrAccessChain [[TYPE_PTR]] [[BUF1_EMB_OUTER_ID]] [[THIS_EMB_LOAD]] [[OFFSET_CONST_0]] [[OFFSET_CONST_0]] + ; CHECK-LLVM: %[[BUF1_EMB_OUTER_CLOSURE_ACCESS:[0-9]+]] = getelementptr inbounds %[[CLOSURE_NAME_EMB]], %[[CLOSURE_NAME_EMB]] addrspace(4)* %this1, i32 0, i32 0 + %6 = getelementptr inbounds %"class._ZTSZ4mainE3$_0.anon", %"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %this1, i32 0, i32 0 + %7 = load i32, i32* %i, align 4, !tbaa !5 + %idxprom3 = sext i32 %7 to i64 + ; CHECK-LLVM: getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %[[BUF1_EMB_OUTER_CLOSURE_ACCESS]]{{.*}}, !llvm.index.group ![[BUF1_EMB_OUTER_IDX_GR:[0-9]+]] + %arrayidx4 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %6, i64 0, i64 %idxprom3, !llvm.index.group !11 + %8 = load i32, i32 addrspace(4)* %arrayidx4, align 4, !tbaa !5 + %mul = mul nsw i32 %8, %add2 + store i32 %mul, i32 addrspace(4)* %arrayidx4, align 4, !tbaa !5 + %9 = bitcast i32* %j to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %9) #3 + store i32 0, i32* %j, align 4, !tbaa !5 + br label %for.cond5 + +for.cond5: ; preds = %for.inc, %for.body + %10 = load i32, i32* %j, align 4, !tbaa !5 + %cmp6 = icmp slt i32 %10, 7 + ; Per SPIR-V spec extension INTEL/SPV_INTEL_fpga_loop_controls, + ; DependencyArrayINTEL & LoopControlDependencyLengthMask = 0x40000 & 0x00000008 = 262152 + ; CHECK-SPIRV: LoopMerge [[MERGE_BLOCK:[0-9]+]] {{[0-9]+}} 262152 2 3 [[BUF1_EMB_INNER_ID]] 2 [[BUF2_EMB_INNER_PRE_ADD_ID:[0-9]+]] 2 [[BUF2_EMB_INNER_PRE_MUL_ID:[0-9]+]] 2 + ; CHECK-SPIRV-NEXT: BranchConditional {{[0-9]+}} {{[0-9]+}} [[MERGE_BLOCK]] + br i1 %cmp6, label %for.body8, label %for.cond.cleanup7 + +for.cond.cleanup7: ; preds = %for.cond5 + store i32 5, i32* %cleanup.dest.slot, align 4 + %11 = bitcast i32* %j to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %11) #3 + br label %for.end + +for.body8: ; preds = %for.cond5 + ; CHECK-SPIRV: InBoundsPtrAccessChain [[TYPE_PTR]] [[BUF1_EMB_INNER_ID]] [[THIS_EMB_LOAD]] [[OFFSET_CONST_0]] [[OFFSET_CONST_0]] + ; CHECK-LLVM: %[[BUF1_EMB_INNER_CLOSURE_ACCESS:[0-9]+]] = getelementptr inbounds %[[CLOSURE_NAME_EMB]], %[[CLOSURE_NAME_EMB]] addrspace(4)* %this1, i32 0, i32 0 + %12 = getelementptr inbounds %"class._ZTSZ4mainE3$_0.anon", %"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %this1, i32 0, i32 0 + %13 = load i32, i32* %i, align 4, !tbaa !5 + %idxprom9 = sext i32 %13 to i64 + ; CHECK-LLVM: getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %[[BUF1_EMB_INNER_CLOSURE_ACCESS]]{{.*}}, !llvm.index.group ![[BUF1_EMB_INNER_IDX_GR:[0-9]+]] + %arrayidx10 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %12, i64 0, i64 %idxprom9, !llvm.index.group !12 + %14 = load i32, i32 addrspace(4)* %arrayidx10, align 4, !tbaa !5 + ; CHECK-SPIRV: InBoundsPtrAccessChain [[TYPE_PTR]] [[BUF2_EMB_INNER_PRE_ADD_ID]] [[THIS_EMB_LOAD]] [[OFFSET_CONST_0]] [[OFFSET_CONST_1]] + ; CHECK-LLVM: %[[BUF2_EMB_INNER_CLOSURE_ACCESS:[0-9]+]] = getelementptr inbounds %[[CLOSURE_NAME_EMB]], %[[CLOSURE_NAME_EMB]] addrspace(4)* %this1, i32 0, i32 1 + %15 = getelementptr inbounds %"class._ZTSZ4mainE3$_0.anon", %"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %this1, i32 0, i32 1 + %16 = load i32, i32* %i, align 4, !tbaa !5 + %add11 = add nsw i32 %16, 3 + %idxprom12 = sext i32 %add11 to i64 + ; CHECK-LLVM: getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %[[BUF2_EMB_INNER_CLOSURE_ACCESS]]{{.*}}, !llvm.index.group ![[BUF2_EMB_INNER_IDX_GR:[0-9]+]] + %arrayidx13 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %15, i64 0, i64 %idxprom12, !llvm.index.group !14 + %17 = load i32, i32 addrspace(4)* %arrayidx13, align 4, !tbaa !5 + %add14 = add nsw i32 %14, %17 + ; CHECK-SPIRV: InBoundsPtrAccessChain [[TYPE_PTR]] [[BUF2_EMB_INNER_PRE_MUL_ID]] [[THIS_EMB_LOAD]] [[OFFSET_CONST_0]] [[OFFSET_CONST_1]] + ; CHECK-LLVM: %[[BUF2_EMB_INNER_CLOSURE_ACCESS:[0-9]+]] = getelementptr inbounds %[[CLOSURE_NAME_EMB]], %[[CLOSURE_NAME_EMB]] addrspace(4)* %this1, i32 0, i32 1 + %18 = getelementptr inbounds %"class._ZTSZ4mainE3$_0.anon", %"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %this1, i32 0, i32 1 + %19 = load i32, i32* %i, align 4, !tbaa !5 + %idxprom15 = sext i32 %19 to i64 + ; CHECK-LLVM: getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %[[BUF2_EMB_INNER_CLOSURE_ACCESS]]{{.*}}, !llvm.index.group ![[BUF2_EMB_INNER_IDX_GR]] + %arrayidx16 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %18, i64 0, i64 %idxprom15, !llvm.index.group !14 + %20 = load i32, i32 addrspace(4)* %arrayidx16, align 4, !tbaa !5 + %mul17 = mul nsw i32 %20, %add14 + store i32 %mul17, i32 addrspace(4)* %arrayidx16, align 4, !tbaa !5 + br label %for.inc + +for.inc: ; preds = %for.body8 + %21 = load i32, i32* %j, align 4, !tbaa !5 + %inc = add nsw i32 %21, 1 + store i32 %inc, i32* %j, align 4, !tbaa !5 + ; CHECK-LLVM: br label %{{.*}}, !llvm.loop ![[EMB_INNER_MD_LOOP:[0-9]+]] + br label %for.cond5, !llvm.loop !15 + +for.end: ; preds = %for.cond.cleanup7 + br label %for.inc18 + +for.inc18: ; preds = %for.end + %22 = load i32, i32* %i, align 4, !tbaa !5 + %inc19 = add nsw i32 %22, 1 + store i32 %inc19, i32* %i, align 4, !tbaa !5 + ; CHECK-LLVM: br label %{{.*}}, !llvm.loop ![[EMB_OUTER_MD_LOOP:[0-9]+]] + br label %for.cond, !llvm.loop !18 + +for.end20: ; preds = %for.cond.cleanup + ret void +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: convergent norecurse +define dso_local spir_kernel void @_ZTSZ4mainE18VaryingSafelenTest(i32 %_arg_, i32 %_arg_1, i32 %_arg_3, i32 %_arg_5, i32 %_arg_7, i32 %_arg_9, i32 %_arg_11, i32 %_arg_13, i32 %_arg_15, i32 %_arg_17, i32 %_arg_19, i32 %_arg_21, i32 %_arg_23, i32 %_arg_25, i32 %_arg_27, i32 %_arg_29, i32 %_arg_31, i32 %_arg_33, i32 %_arg_35, i32 %_arg_37) #0 !kernel_arg_buffer_location !4 { +entry: + %_arg_.addr = alloca i32, align 4 + %_arg_.addr2 = alloca i32, align 4 + %_arg_.addr4 = alloca i32, align 4 + %_arg_.addr6 = alloca i32, align 4 + %_arg_.addr8 = alloca i32, align 4 + %_arg_.addr10 = alloca i32, align 4 + %_arg_.addr12 = alloca i32, align 4 + %_arg_.addr14 = alloca i32, align 4 + %_arg_.addr16 = alloca i32, align 4 + %_arg_.addr18 = alloca i32, align 4 + %_arg_.addr20 = alloca i32, align 4 + %_arg_.addr22 = alloca i32, align 4 + %_arg_.addr24 = alloca i32, align 4 + %_arg_.addr26 = alloca i32, align 4 + %_arg_.addr28 = alloca i32, align 4 + %_arg_.addr30 = alloca i32, align 4 + %_arg_.addr32 = alloca i32, align 4 + %_arg_.addr34 = alloca i32, align 4 + %_arg_.addr36 = alloca i32, align 4 + %_arg_.addr38 = alloca i32, align 4 + %0 = alloca %"class._ZTSZ4mainE3$_0.anon.0", align 4 + store i32 %_arg_, i32* %_arg_.addr, align 4, !tbaa !5 + store i32 %_arg_1, i32* %_arg_.addr2, align 4, !tbaa !5 + store i32 %_arg_3, i32* %_arg_.addr4, align 4, !tbaa !5 + store i32 %_arg_5, i32* %_arg_.addr6, align 4, !tbaa !5 + store i32 %_arg_7, i32* %_arg_.addr8, align 4, !tbaa !5 + store i32 %_arg_9, i32* %_arg_.addr10, align 4, !tbaa !5 + store i32 %_arg_11, i32* %_arg_.addr12, align 4, !tbaa !5 + store i32 %_arg_13, i32* %_arg_.addr14, align 4, !tbaa !5 + store i32 %_arg_15, i32* %_arg_.addr16, align 4, !tbaa !5 + store i32 %_arg_17, i32* %_arg_.addr18, align 4, !tbaa !5 + store i32 %_arg_19, i32* %_arg_.addr20, align 4, !tbaa !5 + store i32 %_arg_21, i32* %_arg_.addr22, align 4, !tbaa !5 + store i32 %_arg_23, i32* %_arg_.addr24, align 4, !tbaa !5 + store i32 %_arg_25, i32* %_arg_.addr26, align 4, !tbaa !5 + store i32 %_arg_27, i32* %_arg_.addr28, align 4, !tbaa !5 + store i32 %_arg_29, i32* %_arg_.addr30, align 4, !tbaa !5 + store i32 %_arg_31, i32* %_arg_.addr32, align 4, !tbaa !5 + store i32 %_arg_33, i32* %_arg_.addr34, align 4, !tbaa !5 + store i32 %_arg_35, i32* %_arg_.addr36, align 4, !tbaa !5 + store i32 %_arg_37, i32* %_arg_.addr38, align 4, !tbaa !5 + %1 = bitcast %"class._ZTSZ4mainE3$_0.anon.0"* %0 to i8* + call void @llvm.lifetime.start.p0i8(i64 80, i8* %1) #3 + %2 = getelementptr inbounds %"class._ZTSZ4mainE3$_0.anon.0", %"class._ZTSZ4mainE3$_0.anon.0"* %0, i32 0, i32 0 + %arrayinit.begin = getelementptr inbounds [10 x i32], [10 x i32]* %2, i64 0, i64 0 + %3 = load i32, i32* %_arg_.addr, align 4, !tbaa !5 + store i32 %3, i32* %arrayinit.begin, align 4, !tbaa !5 + %arrayinit.element = getelementptr inbounds i32, i32* %arrayinit.begin, i64 1 + %4 = load i32, i32* %_arg_.addr2, align 4, !tbaa !5 + store i32 %4, i32* %arrayinit.element, align 4, !tbaa !5 + %arrayinit.element39 = getelementptr inbounds i32, i32* %arrayinit.element, i64 1 + %5 = load i32, i32* %_arg_.addr4, align 4, !tbaa !5 + store i32 %5, i32* %arrayinit.element39, align 4, !tbaa !5 + %arrayinit.element40 = getelementptr inbounds i32, i32* %arrayinit.element39, i64 1 + %6 = load i32, i32* %_arg_.addr6, align 4, !tbaa !5 + store i32 %6, i32* %arrayinit.element40, align 4, !tbaa !5 + %arrayinit.element41 = getelementptr inbounds i32, i32* %arrayinit.element40, i64 1 + %7 = load i32, i32* %_arg_.addr8, align 4, !tbaa !5 + store i32 %7, i32* %arrayinit.element41, align 4, !tbaa !5 + %arrayinit.element42 = getelementptr inbounds i32, i32* %arrayinit.element41, i64 1 + %8 = load i32, i32* %_arg_.addr10, align 4, !tbaa !5 + store i32 %8, i32* %arrayinit.element42, align 4, !tbaa !5 + %arrayinit.element43 = getelementptr inbounds i32, i32* %arrayinit.element42, i64 1 + %9 = load i32, i32* %_arg_.addr12, align 4, !tbaa !5 + store i32 %9, i32* %arrayinit.element43, align 4, !tbaa !5 + %arrayinit.element44 = getelementptr inbounds i32, i32* %arrayinit.element43, i64 1 + %10 = load i32, i32* %_arg_.addr14, align 4, !tbaa !5 + store i32 %10, i32* %arrayinit.element44, align 4, !tbaa !5 + %arrayinit.element45 = getelementptr inbounds i32, i32* %arrayinit.element44, i64 1 + %11 = load i32, i32* %_arg_.addr16, align 4, !tbaa !5 + store i32 %11, i32* %arrayinit.element45, align 4, !tbaa !5 + %arrayinit.element46 = getelementptr inbounds i32, i32* %arrayinit.element45, i64 1 + %12 = load i32, i32* %_arg_.addr18, align 4, !tbaa !5 + store i32 %12, i32* %arrayinit.element46, align 4, !tbaa !5 + %13 = getelementptr inbounds %"class._ZTSZ4mainE3$_0.anon.0", %"class._ZTSZ4mainE3$_0.anon.0"* %0, i32 0, i32 1 + %arrayinit.begin47 = getelementptr inbounds [10 x i32], [10 x i32]* %13, i64 0, i64 0 + %14 = load i32, i32* %_arg_.addr20, align 4, !tbaa !5 + store i32 %14, i32* %arrayinit.begin47, align 4, !tbaa !5 + %arrayinit.element48 = getelementptr inbounds i32, i32* %arrayinit.begin47, i64 1 + %15 = load i32, i32* %_arg_.addr22, align 4, !tbaa !5 + store i32 %15, i32* %arrayinit.element48, align 4, !tbaa !5 + %arrayinit.element49 = getelementptr inbounds i32, i32* %arrayinit.element48, i64 1 + %16 = load i32, i32* %_arg_.addr24, align 4, !tbaa !5 + store i32 %16, i32* %arrayinit.element49, align 4, !tbaa !5 + %arrayinit.element50 = getelementptr inbounds i32, i32* %arrayinit.element49, i64 1 + %17 = load i32, i32* %_arg_.addr26, align 4, !tbaa !5 + store i32 %17, i32* %arrayinit.element50, align 4, !tbaa !5 + %arrayinit.element51 = getelementptr inbounds i32, i32* %arrayinit.element50, i64 1 + %18 = load i32, i32* %_arg_.addr28, align 4, !tbaa !5 + store i32 %18, i32* %arrayinit.element51, align 4, !tbaa !5 + %arrayinit.element52 = getelementptr inbounds i32, i32* %arrayinit.element51, i64 1 + %19 = load i32, i32* %_arg_.addr30, align 4, !tbaa !5 + store i32 %19, i32* %arrayinit.element52, align 4, !tbaa !5 + %arrayinit.element53 = getelementptr inbounds i32, i32* %arrayinit.element52, i64 1 + %20 = load i32, i32* %_arg_.addr32, align 4, !tbaa !5 + store i32 %20, i32* %arrayinit.element53, align 4, !tbaa !5 + %arrayinit.element54 = getelementptr inbounds i32, i32* %arrayinit.element53, i64 1 + %21 = load i32, i32* %_arg_.addr34, align 4, !tbaa !5 + store i32 %21, i32* %arrayinit.element54, align 4, !tbaa !5 + %arrayinit.element55 = getelementptr inbounds i32, i32* %arrayinit.element54, i64 1 + %22 = load i32, i32* %_arg_.addr36, align 4, !tbaa !5 + store i32 %22, i32* %arrayinit.element55, align 4, !tbaa !5 + %arrayinit.element56 = getelementptr inbounds i32, i32* %arrayinit.element55, i64 1 + %23 = load i32, i32* %_arg_.addr38, align 4, !tbaa !5 + store i32 %23, i32* %arrayinit.element56, align 4, !tbaa !5 + %24 = addrspacecast %"class._ZTSZ4mainE3$_0.anon.0"* %0 to %"class._ZTSZ4mainE3$_0.anon.0" addrspace(4)* + call spir_func void @"_ZZ4mainEN3$_1clEv"(%"class._ZTSZ4mainE3$_0.anon.0" addrspace(4)* %24) #4 + %25 = bitcast %"class._ZTSZ4mainE3$_0.anon.0"* %0 to i8* + call void @llvm.lifetime.end.p0i8(i64 80, i8* %25) #3 + ret void +} + +; CHECK-SPIRV: Function {{.*}} [[TYPE_SFLN_FUNC]] +; CHECK-LLVM: define internal spir_func void {{.*}}(%[[CLOSURE_NAME_SFLN]] addrspace(4)* %this) +; Function Attrs: convergent inlinehint norecurse nounwind +define internal spir_func void @"_ZZ4mainEN3$_1clEv"(%"class._ZTSZ4mainE3$_0.anon.0" addrspace(4)* %this) #2 align 2 { +entry: + ; CHECK-SPIRV: Variable [[TYPE_SFLN_CLOSURE_PARAM_PTR]] [[THIS_SFLN_ID:[0-9]+]] + ; CHECK-LLVM: %this.addr = alloca %[[CLOSURE_NAME_SFLN]] + %this.addr = alloca %"class._ZTSZ4mainE3$_0.anon.0" addrspace(4)*, align 8 + %i = alloca i32, align 4 + store %"class._ZTSZ4mainE3$_0.anon.0" addrspace(4)* %this, %"class._ZTSZ4mainE3$_0.anon.0" addrspace(4)** %this.addr, align 8, !tbaa !9 + ; CHECK-SPIRV: Load [[TYPE_SFLN_CLOSURE_PTR]] [[THIS_SFLN_LOAD:[0-9]+]] [[THIS_SFLN_ID]] + ; CHECK-LLVM: %this1 = load %[[CLOSURE_NAME_SFLN]] addrspace(4)*, %[[CLOSURE_NAME_SFLN]] addrspace(4)** %this.addr + %this1 = load %"class._ZTSZ4mainE3$_0.anon.0" addrspace(4)*, %"class._ZTSZ4mainE3$_0.anon.0" addrspace(4)** %this.addr, align 8 + %0 = bitcast i32* %i to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #3 + store i32 0, i32* %i, align 4, !tbaa !5 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %1 = load i32, i32* %i, align 4, !tbaa !5 + %cmp = icmp slt i32 %1, 6 + ; Per SPIR-V spec extension INTEL/SPV_INTEL_fpga_loop_controls, + ; DependencyArrayINTEL = 0x40000 = 262144 + ; CHECK-SPIRV: LoopMerge [[MERGE_BLOCK:[0-9]+]] {{[0-9]+}} 262144 5 [[BUF1_SFLN_PRE_MUL_ID:[0-9]+]] 3 [[BUF1_SFLN_PRE_STORE_ID:[0-9]+]] 3 [[BUF2_SFLN_PRE_ADD_1_ID:[0-9]+]] 2 [[BUF2_SFLN_PRE_ADD_2_ID:[0-9]+]] 2 [[BUF2_SFLN_PRE_STORE_ID:[0-9]+]] 2 + ; CHECK-SPIRV-NEXT: BranchConditional {{[0-9]+}} {{[0-9]+}} [[MERGE_BLOCK]] + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + %2 = bitcast i32* %i to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %2) #3 + br label %for.end + +for.body: ; preds = %for.cond + ; CHECK-SPIRV: InBoundsPtrAccessChain [[TYPE_PTR]] [[BUF2_SFLN_PRE_ADD_1_ID]] [[THIS_SFLN_LOAD]] [[OFFSET_CONST_0]] [[OFFSET_CONST_1]] + ; CHECK-LLVM: %[[BUF2_SFLN_CLOSURE_ACCESS:[0-9]+]] = getelementptr inbounds %[[CLOSURE_NAME_SFLN]], %[[CLOSURE_NAME_SFLN]] addrspace(4)* %this1, i32 0, i32 1 + %3 = getelementptr inbounds %"class._ZTSZ4mainE3$_0.anon.0", %"class._ZTSZ4mainE3$_0.anon.0" addrspace(4)* %this1, i32 0, i32 1 + %4 = load i32, i32* %i, align 4, !tbaa !5 + %add = add nsw i32 %4, 4 + %idxprom = sext i32 %add to i64 + ; CHECK-LLVM: getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %[[BUF2_SFLN_CLOSURE_ACCESS]]{{.*}}, !llvm.index.group ![[BUF2_SFLN_INDEX_GROUP:[0-9]+]] + %arrayidx = getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %3, i64 0, i64 %idxprom, !llvm.index.group !20 + %5 = load i32, i32 addrspace(4)* %arrayidx, align 4, !tbaa !5 + %add2 = add nsw i32 %5, 42 + ; CHECK-SPIRV: InBoundsPtrAccessChain [[TYPE_PTR]] [[BUF1_SFLN_PRE_MUL_ID]] [[THIS_SFLN_LOAD]] [[OFFSET_CONST_0]] [[OFFSET_CONST_0]] + ; CHECK-LLVM: %[[BUF1_SFLN_CLOSURE_ACCESS:[0-9]+]] = getelementptr inbounds %[[CLOSURE_NAME_SFLN]], %[[CLOSURE_NAME_SFLN]] addrspace(4)* %this1, i32 0, i32 0 + %6 = getelementptr inbounds %"class._ZTSZ4mainE3$_0.anon.0", %"class._ZTSZ4mainE3$_0.anon.0" addrspace(4)* %this1, i32 0, i32 0 + %7 = load i32, i32* %i, align 4, !tbaa !5 + %idxprom3 = sext i32 %7 to i64 + ; CHECK-LLVM: getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %[[BUF1_SFLN_CLOSURE_ACCESS]]{{.*}}, !llvm.index.group ![[BUF1_SFLN_INDEX_GROUP:[0-9]+]] + %arrayidx4 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %6, i64 0, i64 %idxprom3, !llvm.index.group !21 + %8 = load i32, i32 addrspace(4)* %arrayidx4, align 4, !tbaa !5 + %mul = mul nsw i32 %8, %add2 + store i32 %mul, i32 addrspace(4)* %arrayidx4, align 4, !tbaa !5 + ; CHECK-SPIRV: InBoundsPtrAccessChain [[TYPE_PTR]] [[BUF1_SFLN_PRE_STORE_ID]] [[THIS_SFLN_LOAD]] [[OFFSET_CONST_0]] [[OFFSET_CONST_0]] + ; CHECK-LLVM: %[[BUF1_SFLN_CLOSURE_ACCESS:[0-9]+]] = getelementptr inbounds %[[CLOSURE_NAME_SFLN]], %[[CLOSURE_NAME_SFLN]] addrspace(4)* %this1, i32 0, i32 0 + %9 = getelementptr inbounds %"class._ZTSZ4mainE3$_0.anon.0", %"class._ZTSZ4mainE3$_0.anon.0" addrspace(4)* %this1, i32 0, i32 0 + %10 = load i32, i32* %i, align 4, !tbaa !5 + %idxprom5 = sext i32 %10 to i64 + ; CHECK-LLVM: getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %[[BUF1_SFLN_CLOSURE_ACCESS]]{{.*}}, !llvm.index.group ![[BUF1_SFLN_INDEX_GROUP]] + %arrayidx6 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %9, i64 0, i64 %idxprom5, !llvm.index.group !21 + %11 = load i32, i32 addrspace(4)* %arrayidx6, align 4, !tbaa !5 + ; CHECK-SPIRV: InBoundsPtrAccessChain [[TYPE_PTR]] [[BUF2_SFLN_PRE_ADD_2_ID]] [[THIS_SFLN_LOAD]] [[OFFSET_CONST_0]] [[OFFSET_CONST_1]] + ; CHECK-LLVM: %[[BUF2_SFLN_CLOSURE_ACCESS:[0-9]+]] = getelementptr inbounds %[[CLOSURE_NAME_SFLN]], %[[CLOSURE_NAME_SFLN]] addrspace(4)* %this1, i32 0, i32 1 + %12 = getelementptr inbounds %"class._ZTSZ4mainE3$_0.anon.0", %"class._ZTSZ4mainE3$_0.anon.0" addrspace(4)* %this1, i32 0, i32 1 + %13 = load i32, i32* %i, align 4, !tbaa !5 + %add7 = add nsw i32 %13, 3 + %idxprom8 = sext i32 %add7 to i64 + ; CHECK-LLVM: getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %[[BUF2_SFLN_CLOSURE_ACCESS]]{{.*}}, !llvm.index.group ![[BUF2_SFLN_INDEX_GROUP]] + %arrayidx9 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %12, i64 0, i64 %idxprom8, !llvm.index.group !20 + %14 = load i32, i32 addrspace(4)* %arrayidx9, align 4, !tbaa !5 + %add10 = add nsw i32 %11, %14 + ; CHECK-SPIRV: InBoundsPtrAccessChain [[TYPE_PTR]] [[BUF2_SFLN_PRE_STORE_ID]] [[THIS_SFLN_LOAD]] [[OFFSET_CONST_0]] [[OFFSET_CONST_1]] + ; CHECK-LLVM: %[[BUF2_SFLN_CLOSURE_ACCESS:[0-9]+]] = getelementptr inbounds %[[CLOSURE_NAME_SFLN]], %[[CLOSURE_NAME_SFLN]] addrspace(4)* %this1, i32 0, i32 1 + %15 = getelementptr inbounds %"class._ZTSZ4mainE3$_0.anon.0", %"class._ZTSZ4mainE3$_0.anon.0" addrspace(4)* %this1, i32 0, i32 1 + %16 = load i32, i32* %i, align 4, !tbaa !5 + %idxprom11 = sext i32 %16 to i64 + ; CHECK-LLVM: getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %[[BUF2_SFLN_CLOSURE_ACCESS]]{{.*}}, !llvm.index.group ![[BUF2_SFLN_INDEX_GROUP]] + %arrayidx12 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* %15, i64 0, i64 %idxprom11, !llvm.index.group !20 + %17 = load i32, i32 addrspace(4)* %arrayidx12, align 4, !tbaa !5 + %mul13 = mul nsw i32 %17, %add10 + store i32 %mul13, i32 addrspace(4)* %arrayidx12, align 4, !tbaa !5 + br label %for.inc + +for.inc: ; preds = %for.body + %18 = load i32, i32* %i, align 4, !tbaa !5 + %inc = add nsw i32 %18, 1 + store i32 %inc, i32* %i, align 4, !tbaa !5 + ; CHECK-LLVM: br label %{{.*}}, !llvm.loop ![[SFLN_MD_LOOP:[0-9]+]] + br label %for.cond, !llvm.loop !22 + +for.end: ; preds = %for.cond.cleanup + ret void +} + +attributes #0 = { convergent norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="../../../tests/ivdep.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { convergent inlinehint norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { convergent } + +!llvm.module.flags = !{!0} +!opencl.spir.version = !{!1} +!spirv.Source = !{!2} +!llvm.ident = !{!3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 2} +!2 = !{i32 4, i32 100000} +!3 = !{!"clang version 12.0.0"} +!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!5 = !{!6, !6, i64 0} +!6 = !{!"int", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C++ TBAA"} +!9 = !{!10, !10, i64 0} +!10 = !{!"any pointer", !7, i64 0} +; Double-nested (embedded) loop example +; +; Legacy metadata +; CHECK-LLVM-DAG: ![[IVDEP_LEGACY_SFLN_2:[0-9]+]] = !{!"llvm.loop.ivdep.safelen", i32 2} +; Inner loop +; CHECK-LLVM-DAG: ![[BUF1_EMB_INNER_IDX_GR]] = !{![[BUF1_EMB_OUTER_IDX_GR]], ![[BUF1_EMB_INNER_IDX_NODE:[0-9]+]]} +; CHECK-LLVM-DAG: ![[BUF1_EMB_INNER_IDX_NODE]] = distinct !{} +; CHECK-LLVM-DAG: ![[BUF2_EMB_INNER_IDX_GR]] = distinct !{} +; CHECK-LLVM-DAG: ![[EMB_INNER_MD_LOOP]] = distinct !{![[EMB_INNER_MD_LOOP]], ![[IVDEP_LEGACY_SFLN_2]], ![[IVDEP_INNER_EMB:[0-9]+]]} +; The next 2 directives should overlap +; CHECK-LLVM-MD-OP1-DAG: ![[IVDEP_INNER_EMB]] = !{!"llvm.loop.parallel_access_indices",{{.*}} ![[BUF1_EMB_INNER_IDX_NODE]]{{.*}}, i32 2} +; CHECK-LLVM-MD-OP2-DAG: ![[IVDEP_INNER_EMB]] = !{!"llvm.loop.parallel_access_indices",{{.*}} ![[BUF2_EMB_INNER_IDX_GR]]{{.*}}, i32 2} +; +; Outer loop +; CHECK-LLVM-DAG: ![[BUF1_EMB_OUTER_IDX_GR]] = distinct !{} +; CHECK-LLVM-DAG: ![[EMB_OUTER_MD_LOOP]] = distinct !{![[EMB_OUTER_MD_LOOP]], ![[IVDEP_OUTER_EMB:[0-9]+]]} +; CHECK-LLVM-DAG: ![[IVDEP_OUTER_EMB]] = !{!"llvm.loop.parallel_access_indices", ![[BUF1_EMB_OUTER_IDX_GR]], i32 3} +!11 = distinct !{} +!12 = !{!11, !13} +!13 = distinct !{} +!14 = distinct !{} +!15 = distinct !{!15, !16, !17} +!16 = !{!"llvm.loop.parallel_access_indices", !13, !14, i32 2} +!17 = !{!"llvm.loop.ivdep.safelen", i32 2} +!18 = distinct !{!18, !19} +!19 = !{!"llvm.loop.parallel_access_indices", !11, i32 3} +; One-dimensional loop with varying ivdep parameters example +; +; CHECK-LLVM-DAG: ![[BUF1_SFLN_INDEX_GROUP]] = distinct !{} +; CHECK-LLVM-DAG: ![[BUF2_SFLN_INDEX_GROUP]] = distinct !{} +; CHECK-LLVM-DAG: ![[SFLN_MD_LOOP]] = distinct !{![[SFLN_MD_LOOP]], ![[IVDEP_BUF2_SFLN:[0-9]+]], ![[IVDEP_BUF1_SFLN:[0-9]+]]} +; CHECK-LLVM-DAG: ![[IVDEP_BUF1_SFLN]] = !{!"llvm.loop.parallel_access_indices", ![[BUF1_SFLN_INDEX_GROUP]], i32 3} +; CHECK-LLVM-DAG: ![[IVDEP_BUF2_SFLN]] = !{!"llvm.loop.parallel_access_indices", ![[BUF2_SFLN_INDEX_GROUP]], i32 2} +!20 = distinct !{} +!21 = distinct !{} +!22 = distinct !{!22, !23, !24} +!23 = !{!"llvm.loop.parallel_access_indices", !21, i32 3} +!24 = !{!"llvm.loop.parallel_access_indices", !20, i32 2} From afa5592f2b8b544bbd6cf6dad05cc7f665d670b3 Mon Sep 17 00:00:00 2001 From: amochalo Date: Thu, 8 Oct 2020 16:35:41 +0300 Subject: [PATCH 317/321] Add llvm.ctpop* intrinsic translation Add llvm.ctpop.* translation to BitCount instruction Add test for this Signed-off-by: amochalo --- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 5 +++ llvm-spirv/test/ctpop.ll | 55 ++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 llvm-spirv/test/ctpop.ll diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 4db11b527b7c6..e37d4f737b715 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -1964,6 +1964,7 @@ bool LLVMToSPIRV::isKnownIntrinsic(Intrinsic::ID Id) { case Intrinsic::fabs: case Intrinsic::abs: case Intrinsic::ceil: + case Intrinsic::ctpop: case Intrinsic::ctlz: case Intrinsic::cttz: case Intrinsic::expect: @@ -2097,6 +2098,10 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, return BM->addExtInst(STy, BM->getExtInstSetId(SPIRVEIS_OpenCL), ExtOp, Ops, BB); } + case Intrinsic::ctpop: { + return BM->addUnaryInst(OpBitCount, transType(II->getType()), + transValue(II->getArgOperand(0), BB), BB); + } case Intrinsic::ctlz: case Intrinsic::cttz: { SPIRVWord ExtOp = II->getIntrinsicID() == Intrinsic::ctlz ? OpenCLLIB::Clz diff --git a/llvm-spirv/test/ctpop.ll b/llvm-spirv/test/ctpop.ll new file mode 100644 index 0000000000000..d0356277e37cc --- /dev/null +++ b/llvm-spirv/test/ctpop.ll @@ -0,0 +1,55 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: spirv-val %t.spv + + +; CHECK: BitCount {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} +; CHECK: BitCount {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} +; CHECK: BitCount {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} +; CHECK: BitCount {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} +; CHECK: BitCount {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-linux-sycldevice" + +; Function Attrs: norecurse nounwind readnone +define dso_local spir_kernel void @test(i8 %x8, i16 %x16, i32 %x32, i64 %x64, <2 x i32> %x2i32) local_unnamed_addr #0 !kernel_arg_buffer_location !5 { +entry: + %0 = tail call i8 @llvm.ctpop.i8(i8 %x8) #2 + %1 = tail call i16 @llvm.ctpop.i16(i16 %x16) #2 + %2 = tail call i32 @llvm.ctpop.i32(i32 %x32) #2 + %3 = tail call i64 @llvm.ctpop.i64(i64 %x64) #2 + %4 = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %x2i32) #2 + ret void +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare i8 @llvm.ctpop.i8(i8 ) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare i16 @llvm.ctpop.i16(i16 ) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare i32 @llvm.ctpop.i32(i32 ) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare i64 @llvm.ctpop.i64(i64 ) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32> ) #1 + +attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test.cl" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.spir.version = !{!2, !2} +!spirv.Source = !{!3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 0} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"clang version 12.0.0 (https://github.com/c199914007/llvm.git 0051629b5f4d81af1b049da17bce0b00f03998f8)"} +!5 = !{i32 -1} From 9c26b47051cd32b6dc0d5fc9cf0f94a2b986190b Mon Sep 17 00:00:00 2001 From: Aleksander Fadeev <60697485+fadeeval@users.noreply.github.com> Date: Fri, 9 Oct 2020 16:25:20 +0300 Subject: [PATCH 318/321] Add nearbyint support (#774) * Add support for llvm.nearbyint.* intrinsic Signed-off-by: Aleksander Fadeev --- llvm-spirv/lib/SPIRV/SPIRVUtil.cpp | 3 ++- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 8 ++++++++ llvm-spirv/test/nearbyint.ll | 28 ++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 llvm-spirv/test/nearbyint.ll diff --git a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp index ca0a8b509fda3..11562e5a35325 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp @@ -1533,7 +1533,8 @@ bool checkTypeForSPIRVExtendedInstLowering(IntrinsicInst *II, SPIRVModule *BM) { switch (II->getIntrinsicID()) { case Intrinsic::fabs: case Intrinsic::ceil: - case Intrinsic::maxnum: { + case Intrinsic::maxnum: + case Intrinsic::nearbyint: { Type *Ty = II->getType(); if (II->getArgOperand(0)->getType() != Ty) return false; diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index e37d4f737b715..7a1a0f44dc592 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -1986,6 +1986,7 @@ bool LLVMToSPIRV::isKnownIntrinsic(Intrinsic::ID Id) { case Intrinsic::fmuladd: case Intrinsic::memset: case Intrinsic::memcpy: + case Intrinsic::nearbyint: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::dbg_declare: @@ -2316,6 +2317,13 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, Size = 0; return BM->addLifetimeInst(OC, transValue(II->getOperand(1), BB), Size, BB); } + case Intrinsic::nearbyint: { + if (!checkTypeForSPIRVExtendedInstLowering(II, BM)) + break; + return BM->addExtInst(transType(II->getType()), + BM->getExtInstSetId(SPIRVEIS_OpenCL), OpenCLLIB::Rint, + {transValue(II->getOperand(0), BB)}, BB); + } // We don't want to mix translation of regular code and debug info, because // it creates a mess, therefore translation of debug intrinsics is // postponed until LLVMToSPIRVDbgTran::finalizeDebug...() methods. diff --git a/llvm-spirv/test/nearbyint.ll b/llvm-spirv/test/nearbyint.ll new file mode 100644 index 0000000000000..4d880d8062685 --- /dev/null +++ b/llvm-spirv/test/nearbyint.ll @@ -0,0 +1,28 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: spirv-val %t.spv + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir" + +; CHECK: ExtInst {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} rint + +; Function Attrs: nounwind readnone +define dso_local spir_func float @foo(float %x) local_unnamed_addr #0 { +entry: + %0 = tail call float @llvm.nearbyint.f32(float %x) + ret float %0 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.nearbyint.f32(float) #1 + +attributes #0 = { nounwind readnone "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable willreturn } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git 68e1a8d20795802077987529e1268c184d749564)"} \ No newline at end of file From d8d8bb5b462af9e0be9e263ad000b4ae3631d769 Mon Sep 17 00:00:00 2001 From: Aleksander Fadeev <60697485+fadeeval@users.noreply.github.com> Date: Mon, 12 Oct 2020 10:58:49 +0300 Subject: [PATCH 319/321] Add ReadNone attr for Builtin functions (#768) * Add ReadNone attr for Builtin functions Signed-off-by: Aleksander Fadeev --- llvm-spirv/lib/SPIRV/SPIRVReader.cpp | 33 +++++++++++++ llvm-spirv/lib/SPIRV/SPIRVReader.h | 7 +++ .../builtin_function_readnone_attr.ll | 48 +++++++++++++++++++ 3 files changed, 88 insertions(+) create mode 100644 llvm-spirv/test/transcoding/builtin_function_readnone_attr.ll diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp index 090c0d932f6a2..40669cad235be 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp @@ -4256,6 +4256,8 @@ Instruction *SPIRVToLLVM::transOCLBuiltinFromExtInst(SPIRVExtInst *BC, F->setCallingConv(CallingConv::SPIR_FUNC); if (isFuncNoUnwind()) F->addFnAttr(Attribute::NoUnwind); + if (isFuncReadNone(UnmangledName)) + F->addFnAttr(Attribute::ReadNone); } auto Args = transValue(BC->getValues(BArgs), F, BB); SPIRVDBG(dbgs() << "[transOCLBuiltinFromExtInst] Function: " << *F @@ -4567,3 +4569,34 @@ bool llvm::getSpecConstInfo(std::istream &IS, } return !IS.fail(); } + +// clang-format off +const StringSet<> SPIRVToLLVM::BuiltInConstFunc { + "convert", "get_work_dim", "get_global_size", "sub_group_ballot_bit_count", + "get_global_id", "get_local_size", "get_local_id", "get_num_groups", + "get_group_id", "get_global_offset", "acos", "acosh", "acospi", + "asin", "asinh", "asinpi", "atan", "atan2", "atanh", "atanpi", + "atan2pi", "cbrt", "ceil", "copysign", "cos", "cosh", "cospi", + "erfc", "erf", "exp", "exp2", "exp10", "expm1", "fabs", "fdim", + "floor", "fma", "fmax", "fmin", "fmod", "ilogb", "ldexp", "lgamma", + "log", "log2", "log10", "log1p", "logb", "mad", "maxmag", "minmag", + "nan", "nextafter", "pow", "pown", "powr", "remainder", "rint", + "rootn", "round", "rsqrt", "sin", "sinh", "sinpi", "sqrt", "tan", + "tanh", "tanpi", "tgamma", "trunc", "half_cos", "half_divide", "half_exp", + "half_exp2", "half_exp10", "half_log", "half_log2", "half_log10", "half_powr", + "half_recip", "half_rsqrt", "half_sin", "half_sqrt", "half_tan", "native_cos", + "native_divide", "native_exp", "native_exp2", "native_exp10", "native_log", + "native_log2", "native_log10", "native_powr", "native_recip", "native_rsqrt", + "native_sin", "native_sqrt", "native_tan", "abs", "abs_diff", "add_sat", "hadd", + "rhadd", "clamp", "clz", "mad_hi", "mad_sat", "max", "min", "mul_hi", "rotate", + "sub_sat", "upsample", "popcount", "mad24", "mul24", "degrees", "mix", "radians", + "step", "smoothstep", "sign", "cross", "dot", "distance", "length", "normalize", + "fast_distance", "fast_length", "fast_normalize", "isequal", "isnotequal", + "isgreater", "isgreaterequal", "isless", "islessequal", "islessgreater", + "isfinite", "isinf", "isnan", "isnormal", "isordered", "isunordered", "signbit", + "any", "all", "bitselect", "select", "shuffle", "shuffle2", "get_image_width", + "get_image_height", "get_image_depth", "get_image_channel_data_type", + "get_image_channel_order", "get_image_dim", "get_image_array_size", + "get_image_array_size", "sub_group_inverse_ballot", "sub_group_ballot_bit_extract", +}; +// clang-format on \ No newline at end of file diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.h b/llvm-spirv/lib/SPIRV/SPIRVReader.h index d11b74bba3140..fad4ef4c07800 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.h +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.h @@ -44,6 +44,7 @@ #include "SPIRVModule.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringSet.h" #include "llvm/IR/GlobalValue.h" // llvm::GlobalValue::LinkageTypes namespace llvm { @@ -76,6 +77,7 @@ class SPIRVToLLVM { public: SPIRVToLLVM(Module *LLVMModule, SPIRVModule *TheSPIRVModule); + static const StringSet<> BuiltInConstFunc; std::string getOCLBuiltinName(SPIRVInstruction *BI); std::string getOCLConvertBuiltinName(SPIRVInstruction *BI); std::string getOCLGenericCastToPtrName(SPIRVInstruction *BI); @@ -220,6 +222,11 @@ class SPIRVToLLVM { // OpenCL function always has NoUnwind attribute. // Change this if it is no longer true. bool isFuncNoUnwind() const { return true; } + + bool isFuncReadNone(const std::string &Name) const { + return BuiltInConstFunc.count(Name); + } + bool isSPIRVCmpInstTransToLLVMInst(SPIRVInstruction *BI) const; bool isDirectlyTranslatedToOCL(Op OpCode) const; bool transOCLBuiltinsFromVariables(); diff --git a/llvm-spirv/test/transcoding/builtin_function_readnone_attr.ll b/llvm-spirv/test/transcoding/builtin_function_readnone_attr.ll new file mode 100644 index 0000000000000..5bfb5d0412cf3 --- /dev/null +++ b/llvm-spirv/test/transcoding/builtin_function_readnone_attr.ll @@ -0,0 +1,48 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: llvm-spirv -r %t.spv -o %t.bc +; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "spir-unknown-unknown" + +; Function Attrs: convergent nofree norecurse nounwind uwtable +define dso_local spir_kernel void @test_builtin_readnone(double* nocapture readonly %a, double* nocapture %b) local_unnamed_addr #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { +entry: + %0 = load double, double* %a, align 8, !tbaa !7 + %call = tail call double @_Z3expd(double %0) #2 + store double %call, double* %b, align 8, !tbaa !7 + %1 = load double, double* %a, align 8, !tbaa !7 + %call1 = tail call double @_Z3cosd(double %1) #2 + store double %call1, double* %b, align 8, !tbaa !7 + ret void +} + +; Function Attrs: convergent nounwind readnone +; CHECK-LLVM: declare{{.*}}@_Z3expd{{.*}}#[[#Attrs:]] +declare dso_local double @_Z3expd(double) local_unnamed_addr #1 + +; Function Attrs: convergent nounwind readnone +; CHECK-LLVM: declare{{.*}}@_Z3cosd{{.*}}#[[#Attrs]] +declare dso_local double @_Z3cosd(double) local_unnamed_addr #1 + +attributes #0 = { convergent nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +; CHECK-LLVM: attributes #[[#Attrs]] {{.*}} readnone +attributes #1 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { convergent nounwind readnone } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 2, i32 0} +!2 = !{!"clang version 12.0.0 (https://github.com/intel/llvm 275e05b9dc13deb44eb7c765d23e65358d6bd077)"} +!3 = !{i32 1, i32 1} +!4 = !{!"none", !"none"} +!5 = !{!"double*", !"double*"} +!6 = !{!"", !""} +!7 = !{!8, !8, i64 0} +!8 = !{!"double", !9, i64 0} +!9 = !{!"omnipotent char", !10, i64 0} +!10 = !{!"Simple C/C++ TBAA"} From b8e0e3cb58296074aa56b0c593fc52ef13712b57 Mon Sep 17 00:00:00 2001 From: DmitryBushev Date: Thu, 8 Oct 2020 14:16:15 +0300 Subject: [PATCH 320/321] Implementing VectorComputeCallableFunctionINTEL decoration Added following decoration and bidirectional translation it to "VCCallable" attribute --- llvm-spirv/lib/SPIRV/SPIRVReader.cpp | 2 ++ llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 4 ++++ llvm-spirv/lib/SPIRV/VectorComputeUtil.h | 1 + llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEnum.h | 2 ++ .../lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h | 1 + .../lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | 2 ++ llvm-spirv/lib/SPIRV/libSPIRV/spirv.hpp | 1 + .../test/callable-attribute-decoration.ll | 19 +++++++++++++++++++ 8 files changed, 32 insertions(+) create mode 100644 llvm-spirv/test/callable-attribute-decoration.ll diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp index 40669cad235be..cfbafd66f0fd3 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp @@ -4018,6 +4018,8 @@ bool SPIRVToLLVM::transVectorComputeMetadata(SPIRVFunction *BF) { SPIRVWord SIMTMode = 0; if (BF->hasDecorate(DecorationSIMTCallINTEL, 0, &SIMTMode)) F->addFnAttr(kVCMetadata::VCSIMTCall, std::to_string(SIMTMode)); + if (BF->hasDecorate(DecorationVectorComputeCallableFunctionINTEL)) + F->addFnAttr(kVCMetadata::VCCallable); for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 7a1a0f44dc592..03007fa29f09d 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -621,6 +621,10 @@ void LLVMToSPIRV::transVectorComputeMetadata(Function *F) { BF->addDecorate(DecorationSIMTCallINTEL, SIMTMode); } + if (Attrs.hasFnAttribute(kVCMetadata::VCCallable)) { + BF->addDecorate(DecorationVectorComputeCallableFunctionINTEL); + } + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { auto ArgNo = I->getArgNo(); diff --git a/llvm-spirv/lib/SPIRV/VectorComputeUtil.h b/llvm-spirv/lib/SPIRV/VectorComputeUtil.h index 817e2d6148b37..835764329eaab 100755 --- a/llvm-spirv/lib/SPIRV/VectorComputeUtil.h +++ b/llvm-spirv/lib/SPIRV/VectorComputeUtil.h @@ -110,6 +110,7 @@ const static char VCByteOffset[] = "VCByteOffset"; const static char VCSIMTCall[] = "VCSIMTCall"; const static char VCArgumentKind[] = "VCArgumentKind"; const static char VCArgumentDesc[] = "VCArgumentDesc"; +const static char VCCallable[] = "VCCallable"; } // namespace kVCMetadata namespace kVCType { diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEnum.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEnum.h index f3e1a932189c6..f15cc8720ac62 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEnum.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEnum.h @@ -412,6 +412,8 @@ template <> inline void SPIRVMap::init() { {CapabilityFunctionFloatControlINTEL}); ADD_VEC_INIT(DecorationFunctionFloatingPointModeINTEL, {CapabilityFunctionFloatControlINTEL}); + ADD_VEC_INIT(DecorationVectorComputeCallableFunctionINTEL, + {CapabilityVectorComputeINTEL}); } template <> inline void SPIRVMap::init() { diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h index f9062c9ed2f66..6291da2b2583c 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h @@ -439,6 +439,7 @@ inline bool isValid(spv::Decoration V) { case DecorationFunctionRoundingModeINTEL: case DecorationFunctionDenormModeINTEL: case DecorationFunctionFloatingPointModeINTEL: + case DecorationVectorComputeCallableFunctionINTEL: return true; default: return false; diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h index 23f6eb18219b1..56c4187cdb709 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h @@ -163,6 +163,8 @@ template <> inline void SPIRVMap::init() { add(DecorationFunctionDenormModeINTEL, "FunctionDenormModeINTEL"); add(DecorationFunctionFloatingPointModeINTEL, "FunctionFloatingPointModeINTEL"); + add(DecorationVectorComputeCallableFunctionINTEL, + "VectorComputeCallableFunctionINTEL"); add(DecorationMax, "Max"); } SPIRV_DEF_NAMEMAP(Decoration, SPIRVDecorationNameMap) diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/spirv.hpp b/llvm-spirv/lib/SPIRV/libSPIRV/spirv.hpp index 8d1a43a77fcd8..8c8e4db48e04f 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/spirv.hpp +++ b/llvm-spirv/lib/SPIRV/libSPIRV/spirv.hpp @@ -523,6 +523,7 @@ enum Decoration { DecorationBufferLocationINTEL = 5921, DecorationIOPipeStorageINTEL = 5944, DecorationFunctionFloatingPointModeINTEL = 6080, + DecorationVectorComputeCallableFunctionINTEL = 6087, DecorationMax = 0x7fffffff, }; diff --git a/llvm-spirv/test/callable-attribute-decoration.ll b/llvm-spirv/test/callable-attribute-decoration.ll new file mode 100644 index 0000000000000..2abdfd632ff0f --- /dev/null +++ b/llvm-spirv/test/callable-attribute-decoration.ll @@ -0,0 +1,19 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -o %t.spv --spirv-ext=+SPV_INTEL_vector_compute +; RUN: llvm-spirv %t.spv -o %t.spt --to-text +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV +; RUN: llvm-spirv %t.spv -o %t.bc -r +; RUN: llvm-dis %t.bc -o %t.ll +; RUN: FileCheck < %t.ll %s --check-prefix=CHECK-LLVM +target triple = "spir64" + + +define dso_local <4 x i32> @foo(<4 x i32> %a, <4 x i32> %b) #0 { +entry: + ret <4 x i32> %a +} +; CHECK-SPIRV: 3 Decorate {{[0-9]+}} VectorComputeCallableFunctionINTEL +; CHECK-LLVM: attributes +; CHECK-LLVM-SAME: "VCCallable" + +attributes #0 = { "VCCallable" "VCFunction" } From 97d7eec5a71e0b28f688600f3666ae6eaff0403d Mon Sep 17 00:00:00 2001 From: Artem Gindinson Date: Tue, 20 Oct 2020 14:29:14 +0300 Subject: [PATCH 321/321] Translate the llvm.fshl intrinsic function "Funnel shift left" doesn't have an analogue in the OpenCL ExtInst set. We unroll `llvm.fshl.i(i, i, i)` into a small algorithm that performs the actual funnel shift. A detailed description of FSHL can be found at https://llvm.org/docs/LangRef.html#llvm-fshl-intrinsic Signed-off-by: Artem Gindinson --- llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp | 105 +++++++++++++++++-- llvm-spirv/test/llvm.fshl.ll | 63 +++++++++++ 2 files changed, 161 insertions(+), 7 deletions(-) create mode 100644 llvm-spirv/test/llvm.fshl.ll diff --git a/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp b/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp index b63dc2ce21e58..229112de597e5 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp @@ -85,6 +85,20 @@ class SPIRVRegularizeLLVM : public ModulePass { /// @spirv.llvm_memset_* and replace it with @llvm.memset. void lowerMemset(MemSetInst *MSI); + /// No SPIR-V counterpart for @llvm.fshl.i* intrinsic. It will be lowered + /// to a newly generated @spirv.llvm_fshl_i* function. + /// Conceptually, FSHL: + /// 1. concatenates the ints, the first one being the more significant; + /// 2. performs a left shift-rotate on the resulting doubled-sized int; + /// 3. returns the most significant bits of the shift-rotate result, + /// the number of bits being equal to the size of the original integers. + /// The actual implementation algorithm will be slightly different to speed + /// things up. + void lowerFunnelShiftLeft(IntrinsicInst *FSHLIntrinsic); + void buildFunnelShiftLeftFunc(Function *FSHLFunc); + + static std::string lowerLLVMIntrinsicName(IntrinsicInst *II); + static char ID; private: @@ -94,17 +108,22 @@ class SPIRVRegularizeLLVM : public ModulePass { char SPIRVRegularizeLLVM::ID = 0; -void SPIRVRegularizeLLVM::lowerMemset(MemSetInst *MSI) { - if (isa(MSI->getValue()) && isa(MSI->getLength())) - return; // To be handled in LLVMToSPIRV::transIntrinsicInst - Function *IntrinsicFunc = MSI->getCalledFunction(); +std::string SPIRVRegularizeLLVM::lowerLLVMIntrinsicName(IntrinsicInst *II) { + Function *IntrinsicFunc = II->getCalledFunction(); assert(IntrinsicFunc && "Missing function"); std::string FuncName = IntrinsicFunc->getName().str(); std::replace(FuncName.begin(), FuncName.end(), '.', '_'); FuncName = "spirv." + FuncName; + return FuncName; +} + +void SPIRVRegularizeLLVM::lowerMemset(MemSetInst *MSI) { + if (isa(MSI->getValue()) && isa(MSI->getLength())) + return; // To be handled in LLVMToSPIRV::transIntrinsicInst + + std::string FuncName = lowerLLVMIntrinsicName(MSI); if (MSI->isVolatile()) FuncName += ".volatile"; - // Redirect @llvm.memset.* call to @spirv.llvm_memset_* Function *F = M->getFunction(FuncName); if (F) { @@ -137,6 +156,75 @@ void SPIRVRegularizeLLVM::lowerMemset(MemSetInst *MSI) { return; } +void SPIRVRegularizeLLVM::buildFunnelShiftLeftFunc(Function *FSHLFunc) { + if (!FSHLFunc->empty()) + return; + + auto *IntTy = dyn_cast(FSHLFunc->getReturnType()); + assert(IntTy && "llvm.fshl: expected an integer return type"); + assert(FSHLFunc->arg_size() == 3 && "llvm.fshl: expected 3 arguments"); + for (Argument &Arg : FSHLFunc->args()) + assert(Arg.getType()->getTypeID() == IntTy->getTypeID() && + "llvm.fshl: mismatched return type and argument types"); + + // Our function will require 3 basic blocks; the purpose of each will be + // clarified below. + auto *CondBB = BasicBlock::Create(M->getContext(), "cond", FSHLFunc); + auto *RotateBB = + BasicBlock::Create(M->getContext(), "rotate", FSHLFunc); // Main logic + auto *PhiBB = BasicBlock::Create(M->getContext(), "phi", FSHLFunc); + + IRBuilder<> Builder(CondBB); + // If the number of bits to rotate for is divisible by the bitsize, + // the shift becomes useless, and we should bypass the main logic in that + // case. + unsigned BitWidth = IntTy->getIntegerBitWidth(); + ConstantInt *BitWidthConstant = Builder.getInt({BitWidth, BitWidth}); + auto *RotateModVal = + Builder.CreateURem(/*Rotate*/ FSHLFunc->getArg(2), BitWidthConstant); + ConstantInt *ZeroConstant = Builder.getInt({BitWidth, 0}); + auto *CheckRotateModIfZero = Builder.CreateICmpEQ(RotateModVal, ZeroConstant); + Builder.CreateCondBr(CheckRotateModIfZero, /*True*/ PhiBB, + /*False*/ RotateBB); + + // Build the actual funnel shift rotate logic. + Builder.SetInsertPoint(RotateBB); + // Shift the more significant number left, the "rotate" number of bits + // will be 0-filled on the right as a result of this regular shift. + auto *ShiftLeft = Builder.CreateShl(FSHLFunc->getArg(0), RotateModVal); + // We want the "rotate" number of the second int's MSBs to occupy the + // rightmost "0 space" left by the previous operation. Therefore, + // subtract the "rotate" number from the integer bitsize... + auto *SubRotateVal = Builder.CreateSub(BitWidthConstant, RotateModVal); + // ...and right-shift the second int by this number, zero-filling the MSBs. + auto *ShiftRight = Builder.CreateLShr(FSHLFunc->getArg(1), SubRotateVal); + // A simple binary addition of the shifted ints yields the final result. + auto *FunnelShiftRes = Builder.CreateOr(ShiftLeft, ShiftRight); + Builder.CreateBr(PhiBB); + + // PHI basic block. If no actual rotate was required, return the first, more + // significant int. E.g. for 32-bit integers, it's equivalent to concatenating + // the 2 ints and taking 32 MSBs. + Builder.SetInsertPoint(PhiBB); + PHINode *Phi = Builder.CreatePHI(IntTy, 0); + Phi->addIncoming(FunnelShiftRes, RotateBB); + Phi->addIncoming(FSHLFunc->getArg(0), CondBB); + Builder.CreateRet(Phi); +} + +void SPIRVRegularizeLLVM::lowerFunnelShiftLeft(IntrinsicInst *FSHLIntrinsic) { + // Get a separate function - otherwise, we'd have to rework the CFG of the + // current one. Then simply replace the intrinsic uses with a call to the new + // function. + FunctionType *FSHLFuncTy = FSHLIntrinsic->getFunctionType(); + Type *FSHLRetTy = FSHLFuncTy->getReturnType(); + const std::string FuncName = lowerLLVMIntrinsicName(FSHLIntrinsic); + Function *FSHLFunc = + getOrCreateFunction(M, FSHLRetTy, FSHLFuncTy->params(), FuncName); + buildFunnelShiftLeftFunc(FSHLFunc); + FSHLIntrinsic->setCalledFunction(FSHLFunc); +} + bool SPIRVRegularizeLLVM::runOnModule(Module &Module) { M = &Module; Ctx = &M->getContext(); @@ -170,8 +258,11 @@ bool SPIRVRegularizeLLVM::regularize() { Function *CF = Call->getCalledFunction(); if (CF && CF->isIntrinsic()) { removeFnAttr(Call, Attribute::NoUnwind); - if (auto *MSI = dyn_cast(Call)) + auto *II = cast(Call); + if (auto *MSI = dyn_cast(II)) lowerMemset(MSI); + else if (II->getIntrinsicID() == Intrinsic::fshl) + lowerFunnelShiftLeft(II); } } @@ -254,7 +345,7 @@ bool SPIRVRegularizeLLVM::regularize() { } } for (Instruction *V : ToErase) { - assert(V->user_empty()); + assert(V->user_empty() && "User non-empty\n"); V->eraseFromParent(); } } diff --git a/llvm-spirv/test/llvm.fshl.ll b/llvm-spirv/test/llvm.fshl.ll new file mode 100644 index 0000000000000..5177f82acbda7 --- /dev/null +++ b/llvm-spirv/test/llvm.fshl.ll @@ -0,0 +1,63 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: spirv-val %t.spv + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-unknown" + +; Function Attrs: nounwind readnone +define spir_func i32 @Test(i32 %x, i32 %y) local_unnamed_addr #0 { +entry: + %0 = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 8) + ret i32 %0 +} + +; CHECK: TypeInt [[TYPE_INT:[0-9]+]] 32 0 +; CHECK-DAG: Constant [[TYPE_INT]] [[CONST_ROTATE:[0-9]+]] 8 +; CHECK-DAG: Constant [[TYPE_INT]] [[CONST_TYPE_SIZE:[0-9]+]] 32 +; CHECK-DAG: Constant [[TYPE_INT]] [[CONST_0:[0-9]+]] 0 +; CHECK: TypeFunction [[TYPE_ORIG_FUNC:[0-9]+]] [[TYPE_INT]] [[TYPE_INT]] [[TYPE_INT]] +; CHECK: TypeFunction [[TYPE_FSHL_FUNC:[0-9]+]] [[TYPE_INT]] [[TYPE_INT]] [[TYPE_INT]] [[TYPE_INT]] +; CHECK: TypeBool [[TYPE_BOOL:[0-9]+]] + +; CHECK: Function [[TYPE_INT]] {{[0-9]+}} {{[0-9]+}} [[TYPE_ORIG_FUNC]] +; CHECK: FunctionParameter [[TYPE_INT]] [[X:[0-9]+]] +; CHECK: FunctionParameter [[TYPE_INT]] [[Y:[0-9]+]] +; CHECK: FunctionCall [[TYPE_INT]] [[CALL:[0-9]+]] [[FSHL_FUNC:[0-9]+]] [[X]] [[Y]] [[CONST_ROTATE]] +; CHECK: ReturnValue [[CALL]] + +; CHECK: Function [[TYPE_INT]] [[FSHL_FUNC]] {{[0-9]+}} [[TYPE_FSHL_FUNC]] +; CHECK: FunctionParameter [[TYPE_INT]] [[X_FSHL:[0-9]+]] +; CHECK: FunctionParameter [[TYPE_INT]] [[Y_FSHL:[0-9]+]] +; CHECK: FunctionParameter [[TYPE_INT]] [[ROT:[0-9]+]] + +; CHECK: Label [[MAIN_BB:[0-9]+]] +; CHECK: UMod [[TYPE_INT]] [[ROTATE_MOD_SIZE:[0-9]+]] [[ROT]] [[CONST_TYPE_SIZE]] +; CHECK: IEqual [[TYPE_BOOL]] [[ZERO_COND:[0-9]+]] [[ROTATE_MOD_SIZE]] [[CONST_0]] +; CHECK: BranchConditional [[ZERO_COND]] [[PHI_BB:[0-9]+]] [[ROTATE_BB:[0-9]+]] + +; CHECK: Label [[ROTATE_BB]] +; CHECK: ShiftLeftLogical [[TYPE_INT]] [[X_SHIFT_LEFT:[0-9]+]] [[X_FSHL]] [[ROTATE_MOD_SIZE]] +; CHECK: ISub [[TYPE_INT]] [[NEG_ROTATE:[0-9]+]] [[CONST_TYPE_SIZE]] [[ROTATE_MOD_SIZE]] +; CHECK: ShiftRightLogical [[TYPE_INT]] [[Y_SHIFT_RIGHT:[0-9]+]] [[Y_FSHL]] [[NEG_ROTATE]] +; CHECK: BitwiseOr [[TYPE_INT]] [[FSHL_RESULT:[0-9]+]] [[X_SHIFT_LEFT]] [[Y_SHIFT_RIGHT]] +; CHECK: Branch [[PHI_BB]] + +; CHECK: Label [[PHI_BB]] +; CHECK: Phi [[TYPE_INT]] [[PHI_INST:[0-9]+]] [[FSHL_RESULT]] [[ROTATE_BB]] [[X_FSHL]] [[MAIN_BB]] +; CHECK: ReturnValue [[PHI_INST]] + +; Function Attrs: nounwind readnone speculatable willreturn +declare i32 @llvm.fshl.i32(i32, i32, i32) #1 + +attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable willreturn } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.spir.version = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 0} +!2 = !{i32 1, i32 2}