Skip to content

Commit ee6cd11

Browse files
authored
Merge branch 'develop' into patch-fixed_docs
2 parents 34494e4 + d0bae61 commit ee6cd11

File tree

9 files changed

+504
-19
lines changed

9 files changed

+504
-19
lines changed

libevmasm/AssemblyItem.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,10 @@ class AssemblyItem
8585
m_type(Operation),
8686
m_instruction(_i),
8787
m_debugData(std::move(_debugData))
88-
{}
88+
{
89+
solAssert(_i != Instruction::SWAPN, "Construct via AssemblyItem::swapN");
90+
solAssert(_i != Instruction::DUPN, "Construct via AssemblyItem::dupN");
91+
}
8992
AssemblyItem(AssemblyItemType _type, u256 _data = 0, langutil::DebugData::ConstPtr _debugData = langutil::DebugData::create()):
9093
m_type(_type),
9194
m_debugData(std::move(_debugData))

test/libevmasm/PlainAssemblyParser.cpp

Lines changed: 90 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@
2828

2929
#include <fmt/format.h>
3030

31-
#include <sstream>
32-
3331
using namespace std::string_literals;
3432
using namespace solidity;
3533
using namespace solidity::test;
@@ -39,16 +37,46 @@ using namespace solidity::langutil;
3937

4038
Json PlainAssemblyParser::parse(std::string _sourceName, std::string const& _source)
4139
{
40+
m_sourceStream = std::istringstream(_source);
4241
m_sourceName = std::move(_sourceName);
43-
Json codeJSON = Json::array();
44-
std::istringstream sourceStream(_source);
45-
while (getline(sourceStream, m_line))
42+
m_lineNumber = 0;
43+
44+
advanceLine();
45+
return parseAssembly(0);
46+
}
47+
48+
Json PlainAssemblyParser::parseAssembly(size_t _nestingLevel)
49+
{
50+
Json assemblyJSON = {{".code", Json::array()}};
51+
Json& codeJSON = assemblyJSON[".code"];
52+
53+
while (m_line.has_value())
4654
{
47-
advanceLine(m_line);
4855
if (m_lineTokens.empty())
56+
{
57+
advanceLine();
58+
continue;
59+
}
60+
61+
size_t newLevel = parseNestingLevel();
62+
if (newLevel > _nestingLevel)
63+
BOOST_THROW_EXCEPTION(std::runtime_error(formatError("Indentation does not match the current subassembly nesting level.")));
64+
65+
if (newLevel < _nestingLevel)
66+
return assemblyJSON;
67+
68+
if (currentToken().value == ".sub")
69+
{
70+
advanceLine();
71+
72+
std::string nextDataIndex = std::to_string(assemblyJSON[".data"].size());
73+
assemblyJSON[".data"][nextDataIndex] = parseAssembly(_nestingLevel + 1);
4974
continue;
75+
}
76+
else if (assemblyJSON.contains(".data"))
77+
BOOST_THROW_EXCEPTION(std::runtime_error(formatError("The code of an assembly must be specified before its subassemblies.")));
5078

51-
if (c_instructions.contains(currentToken().value))
79+
if (c_instructions.contains(currentToken().value) || currentToken().value == "PUSHSIZE")
5280
{
5381
expectNoMoreArguments();
5482
codeJSON.push_back({{"name", currentToken().value}});
@@ -62,6 +90,19 @@ Json PlainAssemblyParser::parse(std::string _sourceName, std::string const& _sou
6290
expectNoMoreArguments();
6391
codeJSON.push_back({{"name", "PUSH [tag]"}, {"value", tagID}});
6492
}
93+
else if (hasMoreTokens() && (nextToken().value == "[$]" || nextToken().value == "#[$]"))
94+
{
95+
std::string pushType = std::string(nextToken().value);
96+
advanceToken();
97+
std::string_view subassemblyID = expectArgument();
98+
expectNoMoreArguments();
99+
100+
if (!subassemblyID.starts_with("0x"))
101+
BOOST_THROW_EXCEPTION(std::runtime_error(formatError("The subassembly ID must be a hex number prefixed with '0x'.")));
102+
103+
subassemblyID.remove_prefix("0x"s.size());
104+
codeJSON.push_back({{"name", "PUSH " + pushType}, {"value", subassemblyID}});
105+
}
65106
else
66107
{
67108
std::string_view immediateArgument = expectArgument();
@@ -84,8 +125,24 @@ Json PlainAssemblyParser::parse(std::string _sourceName, std::string const& _sou
84125
}
85126
else
86127
BOOST_THROW_EXCEPTION(std::runtime_error(formatError("Unknown instruction.")));
128+
129+
advanceLine();
87130
}
88-
return {{".code", codeJSON}};
131+
132+
return assemblyJSON;
133+
}
134+
135+
size_t PlainAssemblyParser::parseNestingLevel() const
136+
{
137+
std::string_view indentationString = indentation();
138+
139+
if (indentationString != std::string(indentationString.size(), ' '))
140+
BOOST_THROW_EXCEPTION(std::runtime_error(formatError("Non-space characters used for indentation.")));
141+
142+
if (indentationString.size() % 4 != 0)
143+
BOOST_THROW_EXCEPTION(std::runtime_error(formatError("Each indentation level must consist of 4 spaces.")));
144+
145+
return indentationString.size() / 4;
89146
}
90147

91148
PlainAssemblyParser::Token const& PlainAssemblyParser::currentToken() const
@@ -100,6 +157,16 @@ PlainAssemblyParser::Token const& PlainAssemblyParser::nextToken() const
100157
return m_lineTokens[m_tokenIndex + 1];
101158
}
102159

160+
std::string_view PlainAssemblyParser::indentation() const
161+
{
162+
soltestAssert(m_line.has_value());
163+
164+
if (m_lineTokens.empty())
165+
return *m_line;
166+
167+
return std::string_view(*m_line).substr(0, m_lineTokens.at(0).position);
168+
}
169+
103170
bool PlainAssemblyParser::advanceToken()
104171
{
105172
if (!hasMoreTokens())
@@ -125,12 +192,20 @@ void PlainAssemblyParser::expectNoMoreArguments()
125192
BOOST_THROW_EXCEPTION(std::runtime_error(formatError("Too many arguments.")));
126193
}
127194

128-
void PlainAssemblyParser::advanceLine(std::string_view _line)
195+
bool PlainAssemblyParser::advanceLine()
129196
{
197+
std::string line;
198+
if (!getline(m_sourceStream, line))
199+
{
200+
m_line = std::nullopt;
201+
return false;
202+
}
203+
130204
++m_lineNumber;
131-
m_line = _line;
132-
m_lineTokens = tokenizeLine(m_line);
205+
m_line = std::move(line);
206+
m_lineTokens = tokenizeLine(*m_line);
133207
m_tokenIndex = 0;
208+
return true;
134209
}
135210

136211
std::vector<PlainAssemblyParser::Token> PlainAssemblyParser::tokenizeLine(std::string_view _line)
@@ -162,6 +237,9 @@ std::vector<PlainAssemblyParser::Token> PlainAssemblyParser::tokenizeLine(std::s
162237

163238
std::string PlainAssemblyParser::formatError(std::string_view _message) const
164239
{
240+
soltestAssert(m_line.has_value());
241+
soltestAssert(!m_lineTokens.empty());
242+
165243
std::string lineNumberString = std::to_string(m_lineNumber);
166244
std::string padding(lineNumberString.size(), ' ');
167245
std::string underline = std::string(currentToken().position, ' ') + std::string(currentToken().value.size(), '^');
@@ -174,7 +252,7 @@ std::string PlainAssemblyParser::formatError(std::string_view _message) const
174252
_message,
175253
padding, m_sourceName,
176254
padding,
177-
m_lineNumber, m_line,
255+
m_lineNumber, *m_line,
178256
padding, underline
179257
);
180258
}

test/libevmasm/PlainAssemblyParser.h

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
#include <libsolutil/JSON.h>
2222

23+
#include <sstream>
2324
#include <string>
2425
#include <string_view>
2526
#include <vector>
@@ -36,12 +37,20 @@ namespace solidity::evmasm::test
3637
/// - A non-empty line represents a single assembly item.
3738
/// - The name of the item is the first thing on the line and may consist of one or more tokens.
3839
/// - One or more arguments follow the name.
40+
/// - Indentation determines assembly nesting level (4 spaces per level).
41+
/// - A new subassembly starts with '.sub' and contains all subsequent lines at a higher nesting level.
42+
/// The first line at the same or lower nesting level ends the subassembly.
43+
/// - Subassemblies can be nested to arbitrary depth.
44+
/// - The code of an assembly must be specified before its subassemblies.
3945
///
4046
/// Supported items:
4147
/// - All instruction names.
4248
/// - PUSH <hex value>
4349
/// - PUSH [tag] <tagID>
4450
/// - tag <tagID>
51+
/// - PUSH [$] <subassemblyID>
52+
/// - PUSH #[$] <subassemblyID>
53+
/// - .sub
4554
class PlainAssemblyParser
4655
{
4756
public:
@@ -56,24 +65,30 @@ class PlainAssemblyParser
5665
size_t position; ///< Position of the first character of the token within m_line.
5766
};
5867

68+
Json parseAssembly(size_t _nestingLevel);
69+
size_t parseNestingLevel() const;
70+
5971
Token const& currentToken() const;
6072
Token const& nextToken() const;
6173
bool hasMoreTokens() const { return m_tokenIndex + 1 < m_lineTokens.size(); }
6274

75+
std::string_view indentation() const;
76+
6377
bool advanceToken();
6478
std::string_view expectArgument();
6579
void expectNoMoreArguments();
66-
void advanceLine(std::string_view _line);
80+
bool advanceLine();
6781

6882
static std::vector<Token> tokenizeLine(std::string_view _line);
6983
std::string formatError(std::string_view _message) const;
7084

7185
private:
72-
std::string m_sourceName; ///< Name of the file the source comes from.
73-
size_t m_lineNumber = 0; ///< The number of the current line within the source, 1-based.
74-
std::string m_line; ///< The current line, unparsed.
75-
std::vector<Token> m_lineTokens; ///< Decomposition of the current line into tokens (does not include comments).
76-
size_t m_tokenIndex = 0; ///< Points at a token within m_lineTokens.
86+
std::istringstream m_sourceStream; ///< The source code being parsed.
87+
std::string m_sourceName; ///< Name of the file the source comes from.
88+
size_t m_lineNumber = 0; ///< The number of the current line within the source, 1-based.
89+
std::optional<std::string> m_line; ///< The current line, unparsed.
90+
std::vector<Token> m_lineTokens; ///< Decomposition of the current line into tokens (does not include comments).
91+
size_t m_tokenIndex = 0; ///< Points at a token within m_lineTokens.
7792
};
7893

7994
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// ====
2+
// outputs: InputAssemblyJSON,Assembly,Bytecode,Opcodes,SourceMappings
3+
// ----
4+
// InputAssemblyJSON: {
5+
// ".code": []
6+
// }
7+
// Assembly:
8+
// Bytecode:
9+
// Opcodes:
10+
// SourceMappings:
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
PUSHSIZE
2+
3+
.sub
4+
PUSHSIZE
5+
// ====
6+
// outputs: InputAssemblyJSON,Assembly,Bytecode,Opcodes,SourceMappings
7+
// ----
8+
// InputAssemblyJSON: {
9+
// ".code": [
10+
// {
11+
// "name": "PUSHSIZE"
12+
// }
13+
// ],
14+
// ".data": {
15+
// "0": {
16+
// ".code": [
17+
// {
18+
// "name": "PUSHSIZE"
19+
// }
20+
// ]
21+
// }
22+
// }
23+
// }
24+
// Assembly:
25+
// bytecodeSize
26+
// stop
27+
//
28+
// sub_0: assembly {
29+
// bytecodeSize
30+
// }
31+
// Bytecode: 6003fe
32+
// Opcodes: PUSH1 0x3 INVALID
33+
// SourceMappings: :::-:0
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
PUSH [$] 0x0000
2+
PUSH #[$] 0x0000
3+
4+
.sub
5+
PUSH [$] 0x0
6+
PUSH #[$] 0x2
7+
8+
.sub
9+
.sub
10+
.sub
11+
// ====
12+
// outputs: InputAssemblyJSON,Assembly,Bytecode,Opcodes,SourceMappings
13+
// ----
14+
// InputAssemblyJSON: {
15+
// ".code": [
16+
// {
17+
// "name": "PUSH [$]",
18+
// "value": "0000"
19+
// },
20+
// {
21+
// "name": "PUSH #[$]",
22+
// "value": "0000"
23+
// }
24+
// ],
25+
// ".data": {
26+
// "0": {
27+
// ".code": [
28+
// {
29+
// "name": "PUSH [$]",
30+
// "value": "0"
31+
// },
32+
// {
33+
// "name": "PUSH #[$]",
34+
// "value": "2"
35+
// }
36+
// ],
37+
// ".data": {
38+
// "0": {
39+
// ".code": []
40+
// },
41+
// "1": {
42+
// ".code": []
43+
// },
44+
// "2": {
45+
// ".code": []
46+
// }
47+
// }
48+
// }
49+
// }
50+
// }
51+
// Assembly:
52+
// dataOffset(sub_0)
53+
// dataSize(sub_0)
54+
// stop
55+
//
56+
// sub_0: assembly {
57+
// dataOffset(sub_0)
58+
// dataSize(sub_2)
59+
// stop
60+
//
61+
// sub_0: assembly {
62+
// }
63+
//
64+
// sub_1: assembly {
65+
// }
66+
//
67+
// sub_2: assembly {
68+
// }
69+
// }
70+
// Bytecode: 60056005fe60056000fe
71+
// Opcodes: PUSH1 0x5 PUSH1 0x5 INVALID PUSH1 0x5 PUSH1 0x0 INVALID
72+
// SourceMappings: :::-:0;

0 commit comments

Comments
 (0)