Skip to content

Commit c60bd40

Browse files
nvjleLucasSte
authored andcommitted
[SOL] Reworked the SBF textual assembly syntax to match the rbpf-style syntax. (#54)
- Update the syntax of every instruction in SBFInstrInfo.td (currently using the asm variants feature to temporarily support both). - Update AsmParser for the new syntax (add new operand, memory, instruction, and directive parse routines). - Add error checking for unresolved 16-bit branch relocations and emit error message for graceful exit (the old BPF back-end crashes) and corresponding lit unit test. - Add new lit unit tests in MC/SBF and MC/Disassembler/SBF to cover disassembly, object emission, and parsing of every single instruction. This is more extensive coverage than existed previously. - Remaster all CodeGen/SBF unit tests accordingly. - A minor TableGen patch was needed to support asm strings containing '|' within variant strings ('|' happens to be the variant separator). The patch is a bit more complex than it otherwise might be in that we currently support both syntaxes to ease the verification (e.g., being able to see and compare each instruction and object code side-by-side within each unit test). After some 'soak time' for the new functionality, I intend to remove the old syntax altogether and otherwise clean-up. We'll also remove the TableGen patch at that time.
1 parent 581e888 commit c60bd40

File tree

103 files changed

+2872
-753
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

103 files changed

+2872
-753
lines changed

llvm/lib/Target/SBF/AsmParser/SBFAsmParser.cpp

+152-11
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "MCTargetDesc/SBFMCTargetDesc.h"
10+
#include "MCTargetDesc/SBFInstPrinter.h"
1011
#include "TargetInfo/SBFTargetInfo.h"
1112
#include "llvm/ADT/STLExtras.h"
1213
#include "llvm/ADT/StringSwitch.h"
@@ -31,6 +32,10 @@ class SBFAsmParser : public MCTargetAsmParser {
3132

3233
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
3334

35+
bool isNewSyntax() {
36+
return getParser().getAssemblerDialect() == 0;
37+
}
38+
3439
bool PreMatchCheck(OperandVector &Operands);
3540

3641
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -45,21 +50,26 @@ class SBFAsmParser : public MCTargetAsmParser {
4550
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
4651
SMLoc NameLoc, OperandVector &Operands) override;
4752

53+
bool parseOldInstruction(ParseInstructionInfo &Info, StringRef Name,
54+
SMLoc NameLoc, OperandVector &Operands);
55+
4856
bool ParseDirective(AsmToken DirectiveID) override;
4957

50-
// "=" is used as assignment operator for assembly statment, so can't be used
51-
// for symbol assignment.
52-
bool equalIsAsmAssignment() override { return false; }
58+
// "=" is used as assignment operator for assembly statement, so can't be used
59+
// for symbol assignment (old syntax only).
60+
bool equalIsAsmAssignment() override { return isNewSyntax(); }
5361
// "*" is used for dereferencing memory that it will be the start of
54-
// statement.
55-
bool starIsStartOfStatement() override { return true; }
62+
// statement (old syntax only).
63+
bool starIsStartOfStatement() override { return !isNewSyntax(); }
5664

5765
#define GET_ASSEMBLER_HEADER
5866
#include "SBFGenAsmMatcher.inc"
5967

68+
bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
6069
OperandMatchResultTy parseImmediate(OperandVector &Operands);
6170
OperandMatchResultTy parseRegister(OperandVector &Operands);
6271
OperandMatchResultTy parseOperandAsOperator(OperandVector &Operands);
72+
OperandMatchResultTy parseMemOperand(OperandVector &Operands);
6373

6474
public:
6575
enum SBFMatchResultTy {
@@ -161,13 +171,20 @@ struct SBFOperand : public MCParsedAsmOperand {
161171
}
162172

163173
void print(raw_ostream &OS) const override {
174+
auto RegName = [](unsigned Reg) {
175+
if (Reg)
176+
return SBFInstPrinter::getRegisterName(Reg);
177+
else
178+
return "noreg";
179+
};
180+
164181
switch (Kind) {
165182
case Immediate:
166183
OS << *getImm();
167184
break;
168185
case Register:
169-
OS << "<register x";
170-
OS << getReg() << ">";
186+
OS << "<register ";
187+
OS << RegName(getReg()) << ">";
171188
break;
172189
case Token:
173190
OS << "'" << getToken() << "'";
@@ -263,6 +280,10 @@ struct SBFOperand : public MCParsedAsmOperand {
263280

264281
bool SBFAsmParser::PreMatchCheck(OperandVector &Operands) {
265282

283+
// These checks not needed for the new syntax.
284+
if (isNewSyntax())
285+
return false;
286+
266287
if (Operands.size() == 4) {
267288
// check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
268289
// reg1 must be the same as reg2
@@ -293,7 +314,9 @@ bool SBFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
293314
if (PreMatchCheck(Operands))
294315
return Error(IDLoc, "additional inst constraint not met");
295316

296-
switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
317+
unsigned Dialect = getParser().getAssemblerDialect();
318+
switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm,
319+
Dialect)) {
297320
default:
298321
break;
299322
case Match_Success:
@@ -349,6 +372,9 @@ OperandMatchResultTy SBFAsmParser::tryParseRegister(unsigned &RegNo,
349372

350373
OperandMatchResultTy
351374
SBFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
375+
if (isNewSyntax())
376+
llvm_unreachable("parseOperandAsOperator called for new syntax");
377+
352378
SMLoc S = getLoc();
353379

354380
if (getLexer().getKind() == AsmToken::Identifier) {
@@ -458,10 +484,108 @@ OperandMatchResultTy SBFAsmParser::parseImmediate(OperandVector &Operands) {
458484
return MatchOperand_Success;
459485
}
460486

461-
/// ParseInstruction - Parse an SBF instruction which is in SBF verifier
462-
/// format.
487+
OperandMatchResultTy SBFAsmParser::parseMemOperand(OperandVector &Operands) {
488+
if (getLexer().isNot(AsmToken::LBrac)) {
489+
return MatchOperand_ParseFail;
490+
}
491+
492+
getParser().Lex(); // Eat '['.
493+
Operands.push_back(SBFOperand::createToken("[", getLoc()));
494+
495+
if (parseRegister(Operands) != MatchOperand_Success) {
496+
Error(getLoc(), "expected register");
497+
return MatchOperand_ParseFail;
498+
}
499+
500+
if (parseImmediate(Operands) != MatchOperand_Success) {
501+
Error(getLoc(), "expected immediate offset");
502+
return MatchOperand_ParseFail;
503+
}
504+
505+
if (getLexer().isNot(AsmToken::RBrac)) {
506+
Error(getLoc(), "expected ']'");
507+
return MatchOperand_ParseFail;
508+
}
509+
510+
getParser().Lex(); // Eat ']'.
511+
Operands.push_back(SBFOperand::createToken("]", getLoc()));
512+
513+
return MatchOperand_Success;
514+
}
515+
516+
/// Looks at a token type and creates the relevant operand from this
517+
/// information, adding to Operands. If operand was parsed, returns false, else
518+
/// true.
519+
bool SBFAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
520+
if (!isNewSyntax())
521+
llvm_unreachable("parseOperand called for old syntax");
522+
523+
// Attempt to parse token as a register.
524+
if (parseRegister(Operands) == MatchOperand_Success)
525+
return false;
526+
527+
// Attempt to parse token as an immediate.
528+
if (parseImmediate(Operands) == MatchOperand_Success) {
529+
return false;
530+
}
531+
532+
// Attempt to parse token sequence as a memory operand ("[reg+/-offset]").
533+
if (parseMemOperand(Operands) == MatchOperand_Success) {
534+
return false;
535+
}
536+
537+
// Finally we have exhausted all options and must declare defeat.
538+
Error(getLoc(), "unknown operand");
539+
return true;
540+
}
541+
542+
/// Parse an SBF instruction.
463543
bool SBFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
464544
SMLoc NameLoc, OperandVector &Operands) {
545+
if (!isNewSyntax()) {
546+
return parseOldInstruction(Info, Name, NameLoc, Operands);
547+
}
548+
549+
// First operand is token for instruction mnemonic.
550+
Operands.push_back(SBFOperand::createToken(Name, NameLoc));
551+
552+
// If there are no more operands, then finish.
553+
if (getLexer().is(AsmToken::EndOfStatement)) {
554+
getParser().Lex(); // Consume the EndOfStatement.
555+
return false;
556+
}
557+
558+
// Parse first operand.
559+
if (parseOperand(Operands, Name))
560+
return true;
561+
562+
// Parse until end of statement, consuming commas between operands.
563+
while (getLexer().is(AsmToken::Comma)) {
564+
// Consume comma token.
565+
getLexer().Lex();
566+
567+
// Parse next operand.
568+
if (parseOperand(Operands, Name))
569+
return true;
570+
}
571+
572+
if (getLexer().isNot(AsmToken::EndOfStatement)) {
573+
SMLoc Loc = getLexer().getLoc();
574+
getParser().eatToEndOfStatement();
575+
return Error(Loc, "unexpected token");
576+
}
577+
578+
getParser().Lex(); // Consume the EndOfStatement.
579+
return false;
580+
}
581+
582+
/// Parse an SBF instruction which is in SBF verifier format (old syntax).
583+
bool SBFAsmParser::parseOldInstruction(ParseInstructionInfo &Info,
584+
StringRef Name, SMLoc NameLoc,
585+
OperandVector &Operands) {
586+
if (isNewSyntax())
587+
llvm_unreachable("parseOldInstruction called for new syntax");
588+
465589
// The first operand could be either register or actually an operator.
466590
unsigned RegNo = MatchRegisterName(Name);
467591

@@ -502,7 +626,24 @@ bool SBFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
502626
return false;
503627
}
504628

505-
bool SBFAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
629+
bool SBFAsmParser::ParseDirective(AsmToken DirectiveID) {
630+
// This returns false if this function recognizes the directive
631+
// regardless of whether it is successfully handles or reports an
632+
// error. Otherwise it returns true to give the generic parser a
633+
// chance at recognizing it.
634+
StringRef IDVal = DirectiveID.getString();
635+
636+
if (IDVal == ".syntax_old") {
637+
getParser().setAssemblerDialect(1);
638+
return false;
639+
}
640+
if (IDVal == ".syntax_new") {
641+
getParser().setAssemblerDialect(0);
642+
return false;
643+
}
644+
645+
return true;
646+
}
506647

507648
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSBFAsmParser() {
508649
RegisterMCAsmParser<SBFAsmParser> XX(getTheSBFXTarget());

llvm/lib/Target/SBF/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ set(LLVM_TARGET_DEFINITIONS SBF.td)
44

55
tablegen(LLVM SBFGenAsmMatcher.inc -gen-asm-matcher)
66
tablegen(LLVM SBFGenAsmWriter.inc -gen-asm-writer)
7+
tablegen(LLVM SBFGenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
78
tablegen(LLVM SBFGenCallingConv.inc -gen-callingconv)
89
tablegen(LLVM SBFGenDAGISel.inc -gen-dag-isel)
910
tablegen(LLVM SBFGenDisassemblerTables.inc -gen-disassembler)

llvm/lib/Target/SBF/MCTargetDesc/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ add_llvm_component_library(LLVMSBFDesc
22
SBFMCTargetDesc.cpp
33
SBFAsmBackend.cpp
44
SBFInstPrinter.cpp
5+
SBFMCAsmInfo.cpp
56
SBFMCCodeEmitter.cpp
67
SBFELFObjectWriter.cpp
78

llvm/lib/Target/SBF/MCTargetDesc/SBFELFObjectWriter.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "MCTargetDesc/SBFMCTargetDesc.h"
1010
#include "llvm/BinaryFormat/ELF.h"
11+
#include "llvm/MC/MCContext.h"
1112
#include "llvm/MC/MCELFObjectWriter.h"
1213
#include "llvm/MC/MCFixup.h"
1314
#include "llvm/MC/MCObjectWriter.h"
@@ -65,6 +66,10 @@ unsigned SBFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
6566
case FK_PCRel_4:
6667
// CALL instruction.
6768
return ELF::R_SBF_64_32;
69+
case FK_PCRel_2:
70+
// Branch instruction.
71+
Ctx.reportError(Fixup.getLoc(), "2-byte relocations not supported");
72+
return ELF::R_SBF_NONE;
6873
case FK_Data_8:
6974
return (isSolana && !relocAbs64) ? ELF::R_SBF_64_64 : ELF::R_SBF_64_ABS64;
7075
case FK_Data_4:

llvm/lib/Target/SBF/MCTargetDesc/SBFInstPrinter.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ using namespace llvm;
2323

2424
// Include the auto-generated portion of the assembly writer.
2525
#include "SBFGenAsmWriter.inc"
26+
#include "SBFGenAsmWriter1.inc"
2627

2728
void SBFInstPrinter::printInst(const MCInst *MI, uint64_t Address,
2829
StringRef Annot, const MCSubtargetInfo &STI,

llvm/lib/Target/SBF/MCTargetDesc/SBFInstPrinter.h

+21
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,27 @@ class SBFInstPrinter : public MCInstPrinter {
3636
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
3737
static const char *getRegisterName(unsigned RegNo);
3838
};
39+
40+
class MachineInstr;
41+
42+
class SBFLegacyInstPrinter : public SBFInstPrinter {
43+
public:
44+
SBFLegacyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
45+
const MCRegisterInfo &MRI)
46+
: SBFInstPrinter(MAI, MII, MRI) {}
47+
48+
void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
49+
const MCSubtargetInfo &STI, raw_ostream &O) override {
50+
printInstruction(MI, Address, O);
51+
printAnnotation(O, Annot);
52+
}
53+
54+
// Autogenerated by tblgen.
55+
std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
56+
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
57+
static const char *getRegisterName(unsigned RegNo);
58+
};
59+
3960
}
4061

4162
#endif
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
//===-- SBFMCAsmInfo.cpp - SBF Asm properties -----------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file contains the declarations of the SBFMCAsmInfo properties.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "SBFMCAsmInfo.h"
14+
#include "llvm/BinaryFormat/Dwarf.h"
15+
#include "llvm/MC/MCStreamer.h"
16+
#include "llvm/Support/CommandLine.h"
17+
18+
using namespace llvm;
19+
20+
cl::opt<unsigned> SBFAsmWriterVariant(
21+
"sbf-output-asm-variant", cl::Hidden, cl::init(0),
22+
cl::desc("Choose output assembly variant (0 = sbf[default], 1 = legacy)"));
23+
24+
SBFMCAsmInfo::SBFMCAsmInfo(const Triple &TT, const MCTargetOptions &Options) {
25+
AssemblerDialect = SBFAsmWriterVariant;
26+
27+
PrivateGlobalPrefix = ".L";
28+
WeakRefDirective = "\t.weak\t";
29+
30+
UsesELFSectionDirectiveForBSS = true;
31+
HasSingleParameterDotFile = true;
32+
HasDotTypeDotSizeDirective = true;
33+
34+
SupportsDebugInformation = true;
35+
ExceptionsType = ExceptionHandling::DwarfCFI;
36+
MinInstAlignment = 8;
37+
38+
// The default is 4 and it only affects dwarf elf output.
39+
// If not set correctly, the dwarf data will be
40+
// messed up in random places by 4 bytes. .debug_line
41+
// section will be parsable, but with odd offsets and
42+
// line numbers, etc.
43+
CodePointerSize = 8;
44+
45+
UseIntegratedAssembler = false;
46+
}

0 commit comments

Comments
 (0)