GPUOpen-Drivers
diff --git a/‎flang/docs/FlangDriver.md
Lines changed: 2 additions & 2 deletions b/‎flang/docs/FlangDriver.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎flang/docs/GettingStarted.md
Lines changed: 1 addition & 1 deletion b/‎flang/docs/GettingStarted.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎flang/docs/InternalProcedureTrampolines.md
Lines changed: 6 additions & 6 deletions b/‎flang/docs/InternalProcedureTrampolines.md
Lines changed: 6 additions & 6 deletions
diff --git a/‎llvm/include/llvm/Config/llvm-config.h.cmake
Lines changed: 1 addition & 1 deletion b/‎llvm/include/llvm/Config/llvm-config.h.cmake
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
Lines changed: 36 additions & 28 deletions b/‎llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
Lines changed: 36 additions & 28 deletions
diff --git a/‎llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Lines changed: 2 additions & 1 deletion b/‎llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Lines changed: 2 additions & 1 deletion
diff --git a/‎llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
Lines changed: 22 additions & 4 deletions b/‎llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
Lines changed: 22 additions & 4 deletions
diff --git a/‎llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h
Lines changed: 9 additions & 0 deletions b/‎llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h
Lines changed: 9 additions & 0 deletions
diff --git a/‎llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
Lines changed: 71 additions & 50 deletions b/‎llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
Lines changed: 71 additions & 50 deletions
@@ -61,15 +61,15 @@ Note that similarly to `-Xclang` in `clang`, you can use `-Xflang` to forward a
 frontend specific flag from the _compiler_ directly to the _frontend_ driver,
 e.g.:
 
-```lang=bash
+```bash
 flang-new -Xflang -fdebug-dump-parse-tree input.f95
 ```
 
 In the invocation above, `-fdebug-dump-parse-tree` is forwarded to `flang-new
 -fc1`. Without the forwarding flag, `-Xflang`, you would see the following
 warning:
 
-```lang=bash
+```bash
 flang-new: warning: argument unused during compilation:
 ```
 
 
@@ -247,7 +247,7 @@ code.  Note that the packaging of the libraries is different
 between [Clang](https://clang.llvm.org/docs/OffloadingDesign.html#linking-target-device-code) and NVCC, so the library must be linked using
 compatible compiler drivers.
 
-### Bulding in-tree
+#### Building in-tree
 One may build Flang runtime library along with building Flang itself
 by providing these additional CMake variables on top of the Flang in-tree
 build config:
 
@@ -170,15 +170,15 @@ as an actual argument to `foo()`.
 
 The trampoline has the following structure:
 
-```assembly
+```asm
 callee_trampoline:
-  MOV <static-chain-address>, R#
-  JMP <callee-address>
+  MOV static-chain-address, R#
+  JMP callee-address
 ```
 
 Where:
-- `<callee-address>` is the address of function `callee()`.
-- `<static-chain-address>` - the address of the static chain
+- `callee-address` is the address of function `callee()`.
+- `static-chain-address` - the address of the static chain
   object created inside `host()`.
 - `R#` is a target specific register.
 
@@ -249,7 +249,7 @@ One of the options is to use separate allocations for the trampoline code
 and the trampoline "data".
 
 The trampolines may be located in non-writeable executable memory:
-```assembly
+```asm
 trampoline0:
   MOV (TDATA[0].static_chain_address), R#
   JMP (TDATA[0].callee_address)
 
@@ -16,7 +16,7 @@
 
 /* Indicate that this is LLVM compiled from the amd-gfx branch. */
 #define LLVM_HAVE_BRANCH_AMD_GFX
-#define LLVM_MAIN_REVISION 480785
+#define LLVM_MAIN_REVISION 480794
 
 /* Define if LLVM_ENABLE_DUMP is enabled */
 #cmakedefine LLVM_ENABLE_DUMP
 
@@ -35,7 +35,6 @@ namespace llvm {
 /// Implements a Union-find algorithm to compute Minimum Spanning Tree
 /// for a given CFG.
 template <class Edge, class BBInfo> class CFGMST {
-public:
   Function &F;
 
   // Store all the edges in CFG. It may contain some stale edges
@@ -49,6 +48,12 @@ template <class Edge, class BBInfo> class CFGMST {
   // (For function with an infinite loop, this block may be absent)
   bool ExitBlockFound = false;
 
+  BranchProbabilityInfo *const BPI;
+  BlockFrequencyInfo *const BFI;
+
+  // If function entry will be always instrumented.
+  const bool InstrumentFuncEntry;
+
   // Find the root group of the G and compress the path from G to the root.
   BBInfo *findAndCompressGroup(BBInfo *G) {
     if (G->Group != G)
@@ -77,21 +82,6 @@ template <class Edge, class BBInfo> class CFGMST {
     return true;
   }
 
-  // Give BB, return the auxiliary information.
-  BBInfo &getBBInfo(const BasicBlock *BB) const {
-    auto It = BBInfos.find(BB);
-    assert(It->second.get() != nullptr);
-    return *It->second.get();
-  }
-
-  // Give BB, return the auxiliary information if it's available.
-  BBInfo *findBBInfo(const BasicBlock *BB) const {
-    auto It = BBInfos.find(BB);
-    if (It == BBInfos.end())
-      return nullptr;
-    return It->second.get();
-  }
-
   // Traverse the CFG using a stack. Find all the edges and assign the weight.
   // Edges with large weight will be put into MST first so they are less likely
   // to be instrumented.
@@ -236,6 +226,7 @@ template <class Edge, class BBInfo> class CFGMST {
     }
   }
 
+public:
   // Dump the Debug information about the instrumentation.
   void dumpEdges(raw_ostream &OS, const Twine &Message) const {
     if (!Message.str().empty())
@@ -274,25 +265,42 @@ template <class Edge, class BBInfo> class CFGMST {
     return *AllEdges.back();
   }
 
-  BranchProbabilityInfo *BPI;
-  BlockFrequencyInfo *BFI;
-
-  // If function entry will be always instrumented.
-  bool InstrumentFuncEntry;
-
-public:
-  CFGMST(Function &Func, bool InstrumentFuncEntry_,
-         BranchProbabilityInfo *BPI_ = nullptr,
-         BlockFrequencyInfo *BFI_ = nullptr)
-      : F(Func), BPI(BPI_), BFI(BFI_),
-        InstrumentFuncEntry(InstrumentFuncEntry_) {
+  CFGMST(Function &Func, bool InstrumentFuncEntry,
+         BranchProbabilityInfo *BPI = nullptr,
+         BlockFrequencyInfo *BFI = nullptr)
+      : F(Func), BPI(BPI), BFI(BFI), InstrumentFuncEntry(InstrumentFuncEntry) {
     buildEdges();
     sortEdgesByWeight();
     computeMinimumSpanningTree();
     if (AllEdges.size() > 1 && InstrumentFuncEntry)
       std::iter_swap(std::move(AllEdges.begin()),
                      std::move(AllEdges.begin() + AllEdges.size() - 1));
   }
+
+  const std::vector<std::unique_ptr<Edge>> &allEdges() const {
+    return AllEdges;
+  }
+
+  std::vector<std::unique_ptr<Edge>> &allEdges() { return AllEdges; }
+
+  size_t numEdges() const { return AllEdges.size(); }
+
+  size_t bbInfoSize() const { return BBInfos.size(); }
+
+  // Give BB, return the auxiliary information.
+  BBInfo &getBBInfo(const BasicBlock *BB) const {
+    auto It = BBInfos.find(BB);
+    assert(It->second.get() != nullptr);
+    return *It->second.get();
+  }
+
+  // Give BB, return the auxiliary information if it's available.
+  BBInfo *findBBInfo(const BasicBlock *BB) const {
+    auto It = BBInfos.find(BB);
+    if (It == BBInfos.end())
+      return nullptr;
+    return It->second.get();
+  }
 };
 
 } // end namespace llvm
 
@@ -8081,7 +8081,8 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   // For more information, see section F.3 of the 2.06 ISA specification.
   // With ISA 3.0
   if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
-      (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
+      (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
+      ResVT == MVT::f128)
     return Op;
 
   // If the RHS of the comparison is a 0.0, we don't need to do the
 
@@ -110,7 +110,9 @@ static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) {
 }
 
 // TODO: Make this more like AArch64?
-static bool onlyUsesFP(const MachineInstr &MI) {
+bool RISCVRegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
+                                       const MachineRegisterInfo &MRI,
+                                       const TargetRegisterInfo &TRI) const {
   switch (MI.getOpcode()) {
   case TargetOpcode::G_FADD:
   case TargetOpcode::G_FSUB:
@@ -131,11 +133,19 @@ static bool onlyUsesFP(const MachineInstr &MI) {
     break;
   }
 
+  // If we have a copy instruction, we could be feeding floating point
+  // instructions.
+  if (MI.getOpcode() == TargetOpcode::COPY)
+    return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
+           &RISCV::FPRBRegBank;
+
   return false;
 }
 
 // TODO: Make this more like AArch64?
-static bool onlyDefinesFP(const MachineInstr &MI) {
+bool RISCVRegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
+                                          const MachineRegisterInfo &MRI,
+                                          const TargetRegisterInfo &TRI) const {
   switch (MI.getOpcode()) {
   case TargetOpcode::G_FADD:
   case TargetOpcode::G_FSUB:
@@ -156,6 +166,12 @@ static bool onlyDefinesFP(const MachineInstr &MI) {
     break;
   }
 
+  // If we have a copy instruction, we could be fed by floating point
+  // instructions.
+  if (MI.getOpcode() == TargetOpcode::COPY)
+    return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
+           &RISCV::FPRBRegBank;
+
   return false;
 }
 
@@ -173,6 +189,8 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
 
   const MachineFunction &MF = *MI.getParent()->getParent();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
+  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
 
   unsigned GPRSize = getMaximumSize(RISCV::GPRBRegBankID);
   assert((GPRSize == 32 || GPRSize == 64) && "Unexpected GPR size");
@@ -235,7 +253,7 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
                  // assume this was a floating point load in the IR. If it was
                  // not, we would have had a bitcast before reaching that
                  // instruction.
-                 return onlyUsesFP(UseMI);
+                 return onlyUsesFP(UseMI, MRI, TRI);
                })) {
       OperandsMapping = getOperandsMapping(
           {getFPValueMapping(Ty.getSizeInBits()), GPRValueMapping});
@@ -254,7 +272,7 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     }
 
     MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(0).getReg());
-    if (onlyDefinesFP(*DefMI)) {
+    if (onlyDefinesFP(*DefMI, MRI, TRI)) {
       OperandsMapping = getOperandsMapping(
           {getFPValueMapping(Ty.getSizeInBits()), GPRValueMapping});
     }
 
@@ -38,6 +38,15 @@ class RISCVRegisterBankInfo final : public RISCVGenRegisterBankInfo {
 
   const InstructionMapping &
   getInstrMapping(const MachineInstr &MI) const override;
+
+private:
+  /// \returns true if \p MI only uses FPRs.
+  bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+                  const TargetRegisterInfo &TRI) const;
+
+  /// \returns true if \p MI only defines FPRs.
+  bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+                     const TargetRegisterInfo &TRI) const;
 };
 } // end namespace llvm
 #endif
@@ -20,56 +20,77 @@
 
 namespace llvm {
 namespace X86Disassembler {
-
-// Accessor functions for various fields of an Intel instruction
-#define modFromModRM(modRM)  (((modRM) & 0xc0) >> 6)
-#define regFromModRM(modRM)  (((modRM) & 0x38) >> 3)
-#define rmFromModRM(modRM)   ((modRM) & 0x7)
-#define scaleFromSIB(sib)    (((sib) & 0xc0) >> 6)
-#define indexFromSIB(sib)    (((sib) & 0x38) >> 3)
-#define baseFromSIB(sib)     ((sib) & 0x7)
-#define wFromREX(rex)        (((rex) & 0x8) >> 3)
-#define rFromREX(rex)        (((rex) & 0x4) >> 2)
-#define xFromREX(rex)        (((rex) & 0x2) >> 1)
-#define bFromREX(rex)        ((rex) & 0x1)
-
-#define rFromEVEX2of4(evex)     (((~(evex)) & 0x80) >> 7)
-#define xFromEVEX2of4(evex)     (((~(evex)) & 0x40) >> 6)
-#define bFromEVEX2of4(evex)     (((~(evex)) & 0x20) >> 5)
-#define r2FromEVEX2of4(evex)    (((~(evex)) & 0x10) >> 4)
-#define mmmFromEVEX2of4(evex)   ((evex) & 0x7)
-#define wFromEVEX3of4(evex)     (((evex) & 0x80) >> 7)
-#define vvvvFromEVEX3of4(evex)  (((~(evex)) & 0x78) >> 3)
-#define ppFromEVEX3of4(evex)    ((evex) & 0x3)
-#define zFromEVEX4of4(evex)     (((evex) & 0x80) >> 7)
-#define l2FromEVEX4of4(evex)    (((evex) & 0x40) >> 6)
-#define lFromEVEX4of4(evex)     (((evex) & 0x20) >> 5)
-#define bFromEVEX4of4(evex)     (((evex) & 0x10) >> 4)
-#define v2FromEVEX4of4(evex)    (((~evex) & 0x8) >> 3)
-#define aaaFromEVEX4of4(evex)   ((evex) & 0x7)
-
-#define rFromVEX2of3(vex)       (((~(vex)) & 0x80) >> 7)
-#define xFromVEX2of3(vex)       (((~(vex)) & 0x40) >> 6)
-#define bFromVEX2of3(vex)       (((~(vex)) & 0x20) >> 5)
-#define mmmmmFromVEX2of3(vex)   ((vex) & 0x1f)
-#define wFromVEX3of3(vex)       (((vex) & 0x80) >> 7)
-#define vvvvFromVEX3of3(vex)    (((~(vex)) & 0x78) >> 3)
-#define lFromVEX3of3(vex)       (((vex) & 0x4) >> 2)
-#define ppFromVEX3of3(vex)      ((vex) & 0x3)
-
-#define rFromVEX2of2(vex)       (((~(vex)) & 0x80) >> 7)
-#define vvvvFromVEX2of2(vex)    (((~(vex)) & 0x78) >> 3)
-#define lFromVEX2of2(vex)       (((vex) & 0x4) >> 2)
-#define ppFromVEX2of2(vex)      ((vex) & 0x3)
-
-#define rFromXOP2of3(xop)       (((~(xop)) & 0x80) >> 7)
-#define xFromXOP2of3(xop)       (((~(xop)) & 0x40) >> 6)
-#define bFromXOP2of3(xop)       (((~(xop)) & 0x20) >> 5)
-#define mmmmmFromXOP2of3(xop)   ((xop) & 0x1f)
-#define wFromXOP3of3(xop)       (((xop) & 0x80) >> 7)
-#define vvvvFromXOP3of3(vex)    (((~(vex)) & 0x78) >> 3)
-#define lFromXOP3of3(xop)       (((xop) & 0x4) >> 2)
-#define ppFromXOP3of3(xop)      ((xop) & 0x3)
+// Helper macros
+#define bitFromOffset0(val) ((val) & 0x1)
+#define bitFromOffset1(val) (((val) >> 1) & 0x1)
+#define bitFromOffset2(val) (((val) >> 2) & 0x1)
+#define bitFromOffset3(val) (((val) >> 3) & 0x1)
+#define bitFromOffset4(val) (((val) >> 4) & 0x1)
+#define bitFromOffset5(val) (((val) >> 5) & 0x1)
+#define bitFromOffset6(val) (((val) >> 6) & 0x1)
+#define bitFromOffset7(val) (((val) >> 7) & 0x1)
+#define twoBitsFromOffset0(val) ((val) & 0x3)
+#define twoBitsFromOffset6(val) (((val) >> 6) & 0x3)
+#define threeBitsFromOffset0(val) ((val) & 0x7)
+#define threeBitsFromOffset3(val) (((val) >> 3) & 0x7)
+#define fiveBitsFromOffset0(val) ((val) & 0x1f)
+#define invertedBitFromOffset3(val) (((~(val)) >> 3) & 0x1)
+#define invertedBitFromOffset4(val) (((~(val)) >> 4) & 0x1)
+#define invertedBitFromOffset5(val) (((~(val)) >> 5) & 0x1)
+#define invertedBitFromOffset6(val) (((~(val)) >> 6) & 0x1)
+#define invertedBitFromOffset7(val) (((~(val)) >> 7) & 0x1)
+#define invertedFourBitsFromOffset3(val) (((~(val)) >> 3) & 0xf)
+// MOD/RM
+#define modFromModRM(modRM) twoBitsFromOffset6(modRM)
+#define regFromModRM(modRM) threeBitsFromOffset3(modRM)
+#define rmFromModRM(modRM) threeBitsFromOffset0(modRM)
+// SIB
+#define scaleFromSIB(sib) twoBitsFromOffset6(sib)
+#define indexFromSIB(sib) threeBitsFromOffset3(sib)
+#define baseFromSIB(sib) threeBitsFromOffset0(sib)
+// REX
+#define wFromREX(rex) bitFromOffset3(rex)
+#define rFromREX(rex) bitFromOffset2(rex)
+#define xFromREX(rex) bitFromOffset1(rex)
+#define bFromREX(rex) bitFromOffset0(rex)
+// XOP
+#define rFromXOP2of3(xop) invertedBitFromOffset7(xop)
+#define xFromXOP2of3(xop) invertedBitFromOffset6(xop)
+#define bFromXOP2of3(xop) invertedBitFromOffset5(xop)
+#define mmmmmFromXOP2of3(xop) fiveBitsFromOffset0(xop)
+#define wFromXOP3of3(xop) bitFromOffset7(xop)
+#define vvvvFromXOP3of3(xop) invertedFourBitsFromOffset3(xop)
+#define lFromXOP3of3(xop) bitFromOffset2(xop)
+#define ppFromXOP3of3(xop) twoBitsFromOffset0(xop)
+// VEX2
+#define rFromVEX2of2(vex) invertedBitFromOffset7(vex)
+#define vvvvFromVEX2of2(vex) invertedFourBitsFromOffset3(vex)
+#define lFromVEX2of2(vex) bitFromOffset2(vex)
+#define ppFromVEX2of2(vex) twoBitsFromOffset0(vex)
+// VEX3
+#define rFromVEX2of3(vex) invertedBitFromOffset7(vex)
+#define xFromVEX2of3(vex) invertedBitFromOffset6(vex)
+#define bFromVEX2of3(vex) invertedBitFromOffset5(vex)
+#define mmmmmFromVEX2of3(vex) fiveBitsFromOffset0(vex)
+#define wFromVEX3of3(vex) bitFromOffset7(vex)
+#define vvvvFromVEX3of3(vex) invertedFourBitsFromOffset3(vex)
+#define lFromVEX3of3(vex) bitFromOffset2(vex)
+#define ppFromVEX3of3(vex) twoBitsFromOffset0(vex)
+// EVEX
+#define rFromEVEX2of4(evex) invertedBitFromOffset7(evex)
+#define xFromEVEX2of4(evex) invertedBitFromOffset6(evex)
+#define bFromEVEX2of4(evex) invertedBitFromOffset5(evex)
+#define r2FromEVEX2of4(evex) invertedBitFromOffset4(evex)
+#define mmmFromEVEX2of4(evex) threeBitsFromOffset0(evex)
+#define wFromEVEX3of4(evex) bitFromOffset7(evex)
+#define vvvvFromEVEX3of4(evex) invertedFourBitsFromOffset3(evex)
+#define ppFromEVEX3of4(evex) twoBitsFromOffset0(evex)
+#define zFromEVEX4of4(evex) bitFromOffset7(evex)
+#define l2FromEVEX4of4(evex) bitFromOffset6(evex)
+#define lFromEVEX4of4(evex) bitFromOffset5(evex)
+#define bFromEVEX4of4(evex) bitFromOffset4(evex)
+#define v2FromEVEX4of4(evex) invertedBitFromOffset3(evex)
+#define aaaFromEVEX4of4(evex) threeBitsFromOffset0(evex)
 
 // These enums represent Intel registers for use by the decoder.
 #define REGS_8BIT     \