Skip to content

Commit bedf99a

Browse files
SC llvm teamSC llvm team
SC llvm team
authored and
SC llvm team
committed
Merged main:3f743fd3a319 into amd-gfx:fa9cd7924a4e
Local branch amd-gfx fa9cd79 Merged main:1f6eb3ca5cb1 into amd-gfx:8cdc496857f9 Remote branch main 3f743fd [Flang][Docs] Fix lexer issue in Driver and Trampoline doc (llvm#72322)
2 parents fa9cd79 + 3f743fd commit bedf99a

File tree

17 files changed

+248
-153
lines changed

17 files changed

+248
-153
lines changed

flang/docs/FlangDriver.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,15 @@ Note that similarly to `-Xclang` in `clang`, you can use `-Xflang` to forward a
6161
frontend specific flag from the _compiler_ directly to the _frontend_ driver,
6262
e.g.:
6363

64-
```lang=bash
64+
```bash
6565
flang-new -Xflang -fdebug-dump-parse-tree input.f95
6666
```
6767

6868
In the invocation above, `-fdebug-dump-parse-tree` is forwarded to `flang-new
6969
-fc1`. Without the forwarding flag, `-Xflang`, you would see the following
7070
warning:
7171

72-
```lang=bash
72+
```bash
7373
flang-new: warning: argument unused during compilation:
7474
```
7575

flang/docs/GettingStarted.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ code. Note that the packaging of the libraries is different
247247
between [Clang](https://clang.llvm.org/docs/OffloadingDesign.html#linking-target-device-code) and NVCC, so the library must be linked using
248248
compatible compiler drivers.
249249

250-
### Bulding in-tree
250+
#### Building in-tree
251251
One may build Flang runtime library along with building Flang itself
252252
by providing these additional CMake variables on top of the Flang in-tree
253253
build config:

flang/docs/InternalProcedureTrampolines.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -170,15 +170,15 @@ as an actual argument to `foo()`.
170170

171171
The trampoline has the following structure:
172172

173-
```assembly
173+
```asm
174174
callee_trampoline:
175-
MOV <static-chain-address>, R#
176-
JMP <callee-address>
175+
MOV static-chain-address, R#
176+
JMP callee-address
177177
```
178178

179179
Where:
180-
- `<callee-address>` is the address of function `callee()`.
181-
- `<static-chain-address>` - the address of the static chain
180+
- `callee-address` is the address of function `callee()`.
181+
- `static-chain-address` - the address of the static chain
182182
object created inside `host()`.
183183
- `R#` is a target specific register.
184184

@@ -249,7 +249,7 @@ One of the options is to use separate allocations for the trampoline code
249249
and the trampoline "data".
250250
251251
The trampolines may be located in non-writeable executable memory:
252-
```assembly
252+
```asm
253253
trampoline0:
254254
MOV (TDATA[0].static_chain_address), R#
255255
JMP (TDATA[0].callee_address)

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 480785
19+
#define LLVM_MAIN_REVISION 480794
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/include/llvm/Transforms/Instrumentation/CFGMST.h

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ namespace llvm {
3535
/// Implements a Union-find algorithm to compute Minimum Spanning Tree
3636
/// for a given CFG.
3737
template <class Edge, class BBInfo> class CFGMST {
38-
public:
3938
Function &F;
4039

4140
// Store all the edges in CFG. It may contain some stale edges
@@ -49,6 +48,12 @@ template <class Edge, class BBInfo> class CFGMST {
4948
// (For function with an infinite loop, this block may be absent)
5049
bool ExitBlockFound = false;
5150

51+
BranchProbabilityInfo *const BPI;
52+
BlockFrequencyInfo *const BFI;
53+
54+
// If function entry will be always instrumented.
55+
const bool InstrumentFuncEntry;
56+
5257
// Find the root group of the G and compress the path from G to the root.
5358
BBInfo *findAndCompressGroup(BBInfo *G) {
5459
if (G->Group != G)
@@ -77,21 +82,6 @@ template <class Edge, class BBInfo> class CFGMST {
7782
return true;
7883
}
7984

80-
// Give BB, return the auxiliary information.
81-
BBInfo &getBBInfo(const BasicBlock *BB) const {
82-
auto It = BBInfos.find(BB);
83-
assert(It->second.get() != nullptr);
84-
return *It->second.get();
85-
}
86-
87-
// Give BB, return the auxiliary information if it's available.
88-
BBInfo *findBBInfo(const BasicBlock *BB) const {
89-
auto It = BBInfos.find(BB);
90-
if (It == BBInfos.end())
91-
return nullptr;
92-
return It->second.get();
93-
}
94-
9585
// Traverse the CFG using a stack. Find all the edges and assign the weight.
9686
// Edges with large weight will be put into MST first so they are less likely
9787
// to be instrumented.
@@ -236,6 +226,7 @@ template <class Edge, class BBInfo> class CFGMST {
236226
}
237227
}
238228

229+
public:
239230
// Dump the Debug information about the instrumentation.
240231
void dumpEdges(raw_ostream &OS, const Twine &Message) const {
241232
if (!Message.str().empty())
@@ -274,25 +265,42 @@ template <class Edge, class BBInfo> class CFGMST {
274265
return *AllEdges.back();
275266
}
276267

277-
BranchProbabilityInfo *BPI;
278-
BlockFrequencyInfo *BFI;
279-
280-
// If function entry will be always instrumented.
281-
bool InstrumentFuncEntry;
282-
283-
public:
284-
CFGMST(Function &Func, bool InstrumentFuncEntry_,
285-
BranchProbabilityInfo *BPI_ = nullptr,
286-
BlockFrequencyInfo *BFI_ = nullptr)
287-
: F(Func), BPI(BPI_), BFI(BFI_),
288-
InstrumentFuncEntry(InstrumentFuncEntry_) {
268+
CFGMST(Function &Func, bool InstrumentFuncEntry,
269+
BranchProbabilityInfo *BPI = nullptr,
270+
BlockFrequencyInfo *BFI = nullptr)
271+
: F(Func), BPI(BPI), BFI(BFI), InstrumentFuncEntry(InstrumentFuncEntry) {
289272
buildEdges();
290273
sortEdgesByWeight();
291274
computeMinimumSpanningTree();
292275
if (AllEdges.size() > 1 && InstrumentFuncEntry)
293276
std::iter_swap(std::move(AllEdges.begin()),
294277
std::move(AllEdges.begin() + AllEdges.size() - 1));
295278
}
279+
280+
const std::vector<std::unique_ptr<Edge>> &allEdges() const {
281+
return AllEdges;
282+
}
283+
284+
std::vector<std::unique_ptr<Edge>> &allEdges() { return AllEdges; }
285+
286+
size_t numEdges() const { return AllEdges.size(); }
287+
288+
size_t bbInfoSize() const { return BBInfos.size(); }
289+
290+
// Give BB, return the auxiliary information.
291+
BBInfo &getBBInfo(const BasicBlock *BB) const {
292+
auto It = BBInfos.find(BB);
293+
assert(It->second.get() != nullptr);
294+
return *It->second.get();
295+
}
296+
297+
// Give BB, return the auxiliary information if it's available.
298+
BBInfo *findBBInfo(const BasicBlock *BB) const {
299+
auto It = BBInfos.find(BB);
300+
if (It == BBInfos.end())
301+
return nullptr;
302+
return It->second.get();
303+
}
296304
};
297305

298306
} // end namespace llvm

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8081,7 +8081,8 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
80818081
// For more information, see section F.3 of the 2.06 ISA specification.
80828082
// With ISA 3.0
80838083
if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8084-
(!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
8084+
(!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8085+
ResVT == MVT::f128)
80858086
return Op;
80868087

80878088
// If the RHS of the comparison is a 0.0, we don't need to do the

llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,9 @@ static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) {
110110
}
111111

112112
// TODO: Make this more like AArch64?
113-
static bool onlyUsesFP(const MachineInstr &MI) {
113+
bool RISCVRegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
114+
const MachineRegisterInfo &MRI,
115+
const TargetRegisterInfo &TRI) const {
114116
switch (MI.getOpcode()) {
115117
case TargetOpcode::G_FADD:
116118
case TargetOpcode::G_FSUB:
@@ -131,11 +133,19 @@ static bool onlyUsesFP(const MachineInstr &MI) {
131133
break;
132134
}
133135

136+
// If we have a copy instruction, we could be feeding floating point
137+
// instructions.
138+
if (MI.getOpcode() == TargetOpcode::COPY)
139+
return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
140+
&RISCV::FPRBRegBank;
141+
134142
return false;
135143
}
136144

137145
// TODO: Make this more like AArch64?
138-
static bool onlyDefinesFP(const MachineInstr &MI) {
146+
bool RISCVRegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
147+
const MachineRegisterInfo &MRI,
148+
const TargetRegisterInfo &TRI) const {
139149
switch (MI.getOpcode()) {
140150
case TargetOpcode::G_FADD:
141151
case TargetOpcode::G_FSUB:
@@ -156,6 +166,12 @@ static bool onlyDefinesFP(const MachineInstr &MI) {
156166
break;
157167
}
158168

169+
// If we have a copy instruction, we could be fed by floating point
170+
// instructions.
171+
if (MI.getOpcode() == TargetOpcode::COPY)
172+
return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
173+
&RISCV::FPRBRegBank;
174+
159175
return false;
160176
}
161177

@@ -173,6 +189,8 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
173189

174190
const MachineFunction &MF = *MI.getParent()->getParent();
175191
const MachineRegisterInfo &MRI = MF.getRegInfo();
192+
const TargetSubtargetInfo &STI = MF.getSubtarget();
193+
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
176194

177195
unsigned GPRSize = getMaximumSize(RISCV::GPRBRegBankID);
178196
assert((GPRSize == 32 || GPRSize == 64) && "Unexpected GPR size");
@@ -235,7 +253,7 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
235253
// assume this was a floating point load in the IR. If it was
236254
// not, we would have had a bitcast before reaching that
237255
// instruction.
238-
return onlyUsesFP(UseMI);
256+
return onlyUsesFP(UseMI, MRI, TRI);
239257
})) {
240258
OperandsMapping = getOperandsMapping(
241259
{getFPValueMapping(Ty.getSizeInBits()), GPRValueMapping});
@@ -254,7 +272,7 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
254272
}
255273

256274
MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(0).getReg());
257-
if (onlyDefinesFP(*DefMI)) {
275+
if (onlyDefinesFP(*DefMI, MRI, TRI)) {
258276
OperandsMapping = getOperandsMapping(
259277
{getFPValueMapping(Ty.getSizeInBits()), GPRValueMapping});
260278
}

llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,15 @@ class RISCVRegisterBankInfo final : public RISCVGenRegisterBankInfo {
3838

3939
const InstructionMapping &
4040
getInstrMapping(const MachineInstr &MI) const override;
41+
42+
private:
43+
/// \returns true if \p MI only uses FPRs.
44+
bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
45+
const TargetRegisterInfo &TRI) const;
46+
47+
/// \returns true if \p MI only defines FPRs.
48+
bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
49+
const TargetRegisterInfo &TRI) const;
4150
};
4251
} // end namespace llvm
4352
#endif

llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h

Lines changed: 71 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -20,56 +20,77 @@
2020

2121
namespace llvm {
2222
namespace X86Disassembler {
23-
24-
// Accessor functions for various fields of an Intel instruction
25-
#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
26-
#define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
27-
#define rmFromModRM(modRM) ((modRM) & 0x7)
28-
#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)
29-
#define indexFromSIB(sib) (((sib) & 0x38) >> 3)
30-
#define baseFromSIB(sib) ((sib) & 0x7)
31-
#define wFromREX(rex) (((rex) & 0x8) >> 3)
32-
#define rFromREX(rex) (((rex) & 0x4) >> 2)
33-
#define xFromREX(rex) (((rex) & 0x2) >> 1)
34-
#define bFromREX(rex) ((rex) & 0x1)
35-
36-
#define rFromEVEX2of4(evex) (((~(evex)) & 0x80) >> 7)
37-
#define xFromEVEX2of4(evex) (((~(evex)) & 0x40) >> 6)
38-
#define bFromEVEX2of4(evex) (((~(evex)) & 0x20) >> 5)
39-
#define r2FromEVEX2of4(evex) (((~(evex)) & 0x10) >> 4)
40-
#define mmmFromEVEX2of4(evex) ((evex) & 0x7)
41-
#define wFromEVEX3of4(evex) (((evex) & 0x80) >> 7)
42-
#define vvvvFromEVEX3of4(evex) (((~(evex)) & 0x78) >> 3)
43-
#define ppFromEVEX3of4(evex) ((evex) & 0x3)
44-
#define zFromEVEX4of4(evex) (((evex) & 0x80) >> 7)
45-
#define l2FromEVEX4of4(evex) (((evex) & 0x40) >> 6)
46-
#define lFromEVEX4of4(evex) (((evex) & 0x20) >> 5)
47-
#define bFromEVEX4of4(evex) (((evex) & 0x10) >> 4)
48-
#define v2FromEVEX4of4(evex) (((~evex) & 0x8) >> 3)
49-
#define aaaFromEVEX4of4(evex) ((evex) & 0x7)
50-
51-
#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
52-
#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
53-
#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
54-
#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)
55-
#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)
56-
#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)
57-
#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)
58-
#define ppFromVEX3of3(vex) ((vex) & 0x3)
59-
60-
#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)
61-
#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)
62-
#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
63-
#define ppFromVEX2of2(vex) ((vex) & 0x3)
64-
65-
#define rFromXOP2of3(xop) (((~(xop)) & 0x80) >> 7)
66-
#define xFromXOP2of3(xop) (((~(xop)) & 0x40) >> 6)
67-
#define bFromXOP2of3(xop) (((~(xop)) & 0x20) >> 5)
68-
#define mmmmmFromXOP2of3(xop) ((xop) & 0x1f)
69-
#define wFromXOP3of3(xop) (((xop) & 0x80) >> 7)
70-
#define vvvvFromXOP3of3(vex) (((~(vex)) & 0x78) >> 3)
71-
#define lFromXOP3of3(xop) (((xop) & 0x4) >> 2)
72-
#define ppFromXOP3of3(xop) ((xop) & 0x3)
23+
// Helper macros
24+
#define bitFromOffset0(val) ((val) & 0x1)
25+
#define bitFromOffset1(val) (((val) >> 1) & 0x1)
26+
#define bitFromOffset2(val) (((val) >> 2) & 0x1)
27+
#define bitFromOffset3(val) (((val) >> 3) & 0x1)
28+
#define bitFromOffset4(val) (((val) >> 4) & 0x1)
29+
#define bitFromOffset5(val) (((val) >> 5) & 0x1)
30+
#define bitFromOffset6(val) (((val) >> 6) & 0x1)
31+
#define bitFromOffset7(val) (((val) >> 7) & 0x1)
32+
#define twoBitsFromOffset0(val) ((val) & 0x3)
33+
#define twoBitsFromOffset6(val) (((val) >> 6) & 0x3)
34+
#define threeBitsFromOffset0(val) ((val) & 0x7)
35+
#define threeBitsFromOffset3(val) (((val) >> 3) & 0x7)
36+
#define fiveBitsFromOffset0(val) ((val) & 0x1f)
37+
#define invertedBitFromOffset3(val) (((~(val)) >> 3) & 0x1)
38+
#define invertedBitFromOffset4(val) (((~(val)) >> 4) & 0x1)
39+
#define invertedBitFromOffset5(val) (((~(val)) >> 5) & 0x1)
40+
#define invertedBitFromOffset6(val) (((~(val)) >> 6) & 0x1)
41+
#define invertedBitFromOffset7(val) (((~(val)) >> 7) & 0x1)
42+
#define invertedFourBitsFromOffset3(val) (((~(val)) >> 3) & 0xf)
43+
// MOD/RM
44+
#define modFromModRM(modRM) twoBitsFromOffset6(modRM)
45+
#define regFromModRM(modRM) threeBitsFromOffset3(modRM)
46+
#define rmFromModRM(modRM) threeBitsFromOffset0(modRM)
47+
// SIB
48+
#define scaleFromSIB(sib) twoBitsFromOffset6(sib)
49+
#define indexFromSIB(sib) threeBitsFromOffset3(sib)
50+
#define baseFromSIB(sib) threeBitsFromOffset0(sib)
51+
// REX
52+
#define wFromREX(rex) bitFromOffset3(rex)
53+
#define rFromREX(rex) bitFromOffset2(rex)
54+
#define xFromREX(rex) bitFromOffset1(rex)
55+
#define bFromREX(rex) bitFromOffset0(rex)
56+
// XOP
57+
#define rFromXOP2of3(xop) invertedBitFromOffset7(xop)
58+
#define xFromXOP2of3(xop) invertedBitFromOffset6(xop)
59+
#define bFromXOP2of3(xop) invertedBitFromOffset5(xop)
60+
#define mmmmmFromXOP2of3(xop) fiveBitsFromOffset0(xop)
61+
#define wFromXOP3of3(xop) bitFromOffset7(xop)
62+
#define vvvvFromXOP3of3(xop) invertedFourBitsFromOffset3(xop)
63+
#define lFromXOP3of3(xop) bitFromOffset2(xop)
64+
#define ppFromXOP3of3(xop) twoBitsFromOffset0(xop)
65+
// VEX2
66+
#define rFromVEX2of2(vex) invertedBitFromOffset7(vex)
67+
#define vvvvFromVEX2of2(vex) invertedFourBitsFromOffset3(vex)
68+
#define lFromVEX2of2(vex) bitFromOffset2(vex)
69+
#define ppFromVEX2of2(vex) twoBitsFromOffset0(vex)
70+
// VEX3
71+
#define rFromVEX2of3(vex) invertedBitFromOffset7(vex)
72+
#define xFromVEX2of3(vex) invertedBitFromOffset6(vex)
73+
#define bFromVEX2of3(vex) invertedBitFromOffset5(vex)
74+
#define mmmmmFromVEX2of3(vex) fiveBitsFromOffset0(vex)
75+
#define wFromVEX3of3(vex) bitFromOffset7(vex)
76+
#define vvvvFromVEX3of3(vex) invertedFourBitsFromOffset3(vex)
77+
#define lFromVEX3of3(vex) bitFromOffset2(vex)
78+
#define ppFromVEX3of3(vex) twoBitsFromOffset0(vex)
79+
// EVEX
80+
#define rFromEVEX2of4(evex) invertedBitFromOffset7(evex)
81+
#define xFromEVEX2of4(evex) invertedBitFromOffset6(evex)
82+
#define bFromEVEX2of4(evex) invertedBitFromOffset5(evex)
83+
#define r2FromEVEX2of4(evex) invertedBitFromOffset4(evex)
84+
#define mmmFromEVEX2of4(evex) threeBitsFromOffset0(evex)
85+
#define wFromEVEX3of4(evex) bitFromOffset7(evex)
86+
#define vvvvFromEVEX3of4(evex) invertedFourBitsFromOffset3(evex)
87+
#define ppFromEVEX3of4(evex) twoBitsFromOffset0(evex)
88+
#define zFromEVEX4of4(evex) bitFromOffset7(evex)
89+
#define l2FromEVEX4of4(evex) bitFromOffset6(evex)
90+
#define lFromEVEX4of4(evex) bitFromOffset5(evex)
91+
#define bFromEVEX4of4(evex) bitFromOffset4(evex)
92+
#define v2FromEVEX4of4(evex) invertedBitFromOffset3(evex)
93+
#define aaaFromEVEX4of4(evex) threeBitsFromOffset0(evex)
7394

7495
// These enums represent Intel registers for use by the decoder.
7596
#define REGS_8BIT \

0 commit comments

Comments
 (0)