Skip to content

Commit b8e81b9

Browse files
authored
Merge pull request #16052 from ethereum/eof_source_locations_unoptimized_assembly
Collect instruction location info for EOF assembly
2 parents 19db669 + a5e20d2 commit b8e81b9

File tree

1 file changed

+120
-102
lines changed

1 file changed

+120
-102
lines changed

libevmasm/Assembly.cpp

+120-102
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,54 @@ using namespace solidity::evmasm;
5353
using namespace solidity::langutil;
5454
using namespace solidity::util;
5555

56+
namespace
57+
{
58+
59+
/// Produces instruction location info in RAII style. When an assembly instruction is added to the bytecode,
60+
/// this class can be instantiated in that scope. It will record the current bytecode size (before addition)
61+
/// and, at destruction time, record the new bytecode size. This information is then added to an external
62+
/// instruction locations vector.
63+
/// If the instruction decomposes into multiple individual evm instructions, `emit` can be
64+
/// called for all but the last one (which will be emitted by the destructor).
65+
class InstructionLocationEmitter
66+
{
67+
public:
68+
InstructionLocationEmitter(
69+
std::vector<LinkerObject::InstructionLocation>& _instructionLocations,
70+
bytes const& _bytecode,
71+
size_t const _assemblyItemIndex
72+
):
73+
m_instructionLocations(_instructionLocations),
74+
m_bytecode(_bytecode),
75+
m_assemblyItemIndex(_assemblyItemIndex),
76+
m_instructionLocationStart(_bytecode.size())
77+
{}
78+
79+
~InstructionLocationEmitter()
80+
{
81+
emit();
82+
}
83+
84+
void emit()
85+
{
86+
auto const end = m_bytecode.size();
87+
m_instructionLocations.push_back(LinkerObject::InstructionLocation{
88+
.start = m_instructionLocationStart,
89+
.end = end,
90+
.assemblyItemIndex = m_assemblyItemIndex
91+
});
92+
m_instructionLocationStart = end;
93+
}
94+
95+
private:
96+
std::vector<LinkerObject::InstructionLocation>& m_instructionLocations;
97+
bytes const& m_bytecode;
98+
size_t const m_assemblyItemIndex{};
99+
size_t m_instructionLocationStart{};
100+
};
101+
102+
}
103+
56104
std::map<std::string, std::shared_ptr<std::string const>> Assembly::s_sharedSourceNames;
57105

58106
AssemblyItem const& Assembly::append(AssemblyItem _i)
@@ -1281,104 +1329,72 @@ LinkerObject const& Assembly::assembleLegacy() const
12811329
uint8_t dataRefPush = static_cast<uint8_t>(pushInstruction(bytesPerDataRef));
12821330

12831331
LinkerObject::CodeSectionLocation codeSectionLocation;
1332+
codeSectionLocation.instructionLocations.reserve(items.size());
12841333
codeSectionLocation.start = 0;
1285-
size_t assemblyItemIndex = 0;
1286-
auto assembleInstruction = [&](auto&& _addInstruction) {
1287-
size_t start = ret.bytecode.size();
1288-
_addInstruction();
1289-
size_t end = ret.bytecode.size();
1290-
codeSectionLocation.instructionLocations.emplace_back(
1291-
LinkerObject::InstructionLocation{
1292-
.start = start,
1293-
.end = end,
1294-
.assemblyItemIndex = assemblyItemIndex
1295-
}
1296-
);
1297-
};
1298-
for (AssemblyItem const& item: items)
1334+
for (auto const& [assemblyItemIndex, item]: items | ranges::views::enumerate)
12991335
{
1336+
// collect instruction locations via side effects
1337+
InstructionLocationEmitter instructionLocationEmitter(codeSectionLocation.instructionLocations, ret.bytecode, assemblyItemIndex);
13001338
// store position of the invalid jump destination
13011339
if (item.type() != Tag && m_tagPositionsInBytecode[0] == std::numeric_limits<size_t>::max())
13021340
m_tagPositionsInBytecode[0] = ret.bytecode.size();
13031341

13041342
switch (item.type())
13051343
{
13061344
case Operation:
1307-
assembleInstruction([&](){
1308-
ret.bytecode += assembleOperation(item);
1309-
});
1345+
ret.bytecode += assembleOperation(item);
13101346
break;
13111347
case Push:
1312-
assembleInstruction([&](){
1313-
ret.bytecode += assemblePush(item);
1314-
});
1348+
ret.bytecode += assemblePush(item);
13151349
break;
13161350
case PushTag:
1317-
{
1318-
assembleInstruction([&](){
1319-
ret.bytecode.push_back(tagPush);
1320-
tagRefs[ret.bytecode.size()] = item.splitForeignPushTag();
1321-
ret.bytecode.resize(ret.bytecode.size() + bytesPerTag);
1322-
});
1351+
ret.bytecode.push_back(tagPush);
1352+
tagRefs[ret.bytecode.size()] = item.splitForeignPushTag();
1353+
ret.bytecode.resize(ret.bytecode.size() + bytesPerTag);
13231354
break;
1324-
}
13251355
case PushData:
1326-
assembleInstruction([&]() {
1327-
ret.bytecode.push_back(dataRefPush);
1328-
dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size()));
1329-
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1330-
});
1356+
ret.bytecode.push_back(dataRefPush);
1357+
dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size()));
1358+
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
13311359
break;
13321360
case PushSub:
1333-
assembleInstruction([&]() {
1334-
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1335-
ret.bytecode.push_back(dataRefPush);
1336-
subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
1337-
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1338-
});
1361+
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1362+
ret.bytecode.push_back(dataRefPush);
1363+
subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
1364+
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
13391365
break;
13401366
case PushSubSize:
13411367
{
1342-
assembleInstruction([&](){
1343-
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1344-
auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
1345-
item.setPushedValue(u256(s));
1346-
unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
1347-
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
1348-
ret.bytecode.resize(ret.bytecode.size() + b);
1349-
bytesRef byr(&ret.bytecode.back() + 1 - b, b);
1350-
toBigEndian(s, byr);
1351-
});
1368+
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1369+
auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
1370+
item.setPushedValue(u256(s));
1371+
unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
1372+
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
1373+
ret.bytecode.resize(ret.bytecode.size() + b);
1374+
bytesRef byr(&ret.bytecode.back() + 1 - b, b);
1375+
toBigEndian(s, byr);
13521376
break;
13531377
}
13541378
case PushProgramSize:
1355-
{
1356-
assembleInstruction([&](){
1357-
ret.bytecode.push_back(dataRefPush);
1358-
sizeRefs.push_back(static_cast<unsigned>(ret.bytecode.size()));
1359-
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1360-
});
1379+
ret.bytecode.push_back(dataRefPush);
1380+
sizeRefs.push_back(static_cast<unsigned>(ret.bytecode.size()));
1381+
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
13611382
break;
1362-
}
13631383
case PushLibraryAddress:
13641384
{
1365-
assembleInstruction([&]() {
1366-
auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size());
1367-
ret.bytecode += bytecode;
1368-
ret.linkReferences.insert(linkRef);
1369-
});
1385+
auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size());
1386+
ret.bytecode += bytecode;
1387+
ret.linkReferences.insert(linkRef);
13701388
break;
13711389
}
13721390
case PushImmutable:
1373-
assembleInstruction([&]() {
1374-
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32));
1375-
// Maps keccak back to the "identifier" std::string of that immutable.
1376-
ret.immutableReferences[item.data()].first = m_immutables.at(item.data());
1377-
// Record the bytecode offset of the PUSH32 argument.
1378-
ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size());
1379-
// Advance bytecode by 32 bytes (default initialized).
1380-
ret.bytecode.resize(ret.bytecode.size() + 32);
1381-
});
1391+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32));
1392+
// Maps keccak back to the "identifier" std::string of that immutable.
1393+
ret.immutableReferences[item.data()].first = m_immutables.at(item.data());
1394+
// Record the bytecode offset of the PUSH32 argument.
1395+
ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size());
1396+
// Advance bytecode by 32 bytes (default initialized).
1397+
ret.bytecode.resize(ret.bytecode.size() + 32);
13821398
break;
13831399
case VerbatimBytecode:
13841400
ret.bytecode += assembleVerbatimBytecode(item);
@@ -1391,53 +1407,41 @@ LinkerObject const& Assembly::assembleLegacy() const
13911407
{
13921408
if (i != offsets.size() - 1)
13931409
{
1394-
assembleInstruction([&]() {
1395-
ret.bytecode.push_back(uint8_t(Instruction::DUP2));
1396-
});
1397-
assembleInstruction([&]() {
1398-
ret.bytecode.push_back(uint8_t(Instruction::DUP2));
1399-
});
1410+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::DUP2));
1411+
// This item type decomposes into multiple evm instructions, so we manually call emit()
1412+
instructionLocationEmitter.emit();
1413+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::DUP2));
1414+
instructionLocationEmitter.emit();
14001415
}
1401-
assembleInstruction([&]() {
1402-
// TODO: should we make use of the constant optimizer methods for pushing the offsets?
1403-
bytes offsetBytes = toCompactBigEndian(u256(offsets[i]));
1404-
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size()))));
1405-
ret.bytecode += offsetBytes;
1406-
});
1407-
assembleInstruction([&]() {
1408-
ret.bytecode.push_back(uint8_t(Instruction::ADD));
1409-
});
1410-
assembleInstruction([&]() {
1411-
ret.bytecode.push_back(uint8_t(Instruction::MSTORE));
1412-
});
1416+
// TODO: should we make use of the constant optimizer methods for pushing the offsets?
1417+
bytes offsetBytes = toCompactBigEndian(u256(offsets[i]));
1418+
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size()))));
1419+
ret.bytecode += offsetBytes;
1420+
instructionLocationEmitter.emit();
1421+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::ADD));
1422+
instructionLocationEmitter.emit();
1423+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::MSTORE));
1424+
// No emit needed here, it's taken care of by the destructor of instructionLocationEmitter.
14131425
}
14141426
if (offsets.empty())
14151427
{
1416-
assembleInstruction([&]() {
1417-
ret.bytecode.push_back(uint8_t(Instruction::POP));
1418-
});
1419-
assembleInstruction([&]() {
1420-
ret.bytecode.push_back(uint8_t(Instruction::POP));
1421-
});
1428+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::POP));
1429+
instructionLocationEmitter.emit();
1430+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::POP));
1431+
// no emit needed here, it's taken care of by the destructor of instructionLocationEmitter
14221432
}
14231433
immutableReferencesBySub.erase(item.data());
14241434
break;
14251435
}
14261436
case PushDeployTimeAddress:
1427-
assembleInstruction([&]() {
1428-
ret.bytecode += assemblePushDeployTimeAddress();
1429-
});
1437+
ret.bytecode += assemblePushDeployTimeAddress();
14301438
break;
14311439
case Tag:
1432-
assembleInstruction([&](){
1433-
ret.bytecode += assembleTag(item, ret.bytecode.size(), true);
1434-
});
1440+
ret.bytecode += assembleTag(item, ret.bytecode.size(), true);
14351441
break;
14361442
default:
14371443
solAssert(false, "Unexpected opcode while assembling.");
14381444
}
1439-
1440-
++assemblyItemIndex;
14411445
}
14421446

14431447
codeSectionLocation.end = ret.bytecode.size();
@@ -1606,9 +1610,17 @@ LinkerObject const& Assembly::assembleEOF() const
16061610
for (auto&& [codeSectionIndex, codeSection]: m_codeSections | ranges::views::enumerate)
16071611
{
16081612
auto const sectionStart = ret.bytecode.size();
1613+
1614+
std::vector<LinkerObject::InstructionLocation> instructionLocations;
1615+
instructionLocations.reserve(codeSection.items.size());
1616+
16091617
solAssert(!codeSection.items.empty(), "Empty code section.");
1610-
for (AssemblyItem const& item: codeSection.items)
1618+
1619+
for (auto const& [assemblyItemIndex, item]: codeSection.items | ranges::views::enumerate)
16111620
{
1621+
// collect instruction locations via side effects
1622+
InstructionLocationEmitter instructionLocationEmitter {instructionLocations, ret.bytecode, assemblyItemIndex};
1623+
16121624
// store position of the invalid jump destination
16131625
if (item.type() != Tag && m_tagPositionsInBytecode[0] == std::numeric_limits<size_t>::max())
16141626
m_tagPositionsInBytecode[0] = ret.bytecode.size();
@@ -1724,6 +1736,12 @@ LinkerObject const& Assembly::assembleEOF() const
17241736
"Code section too large for EOF."
17251737
);
17261738
setBigEndianUint16(ret.bytecode, codeSectionSizePositions[codeSectionIndex], ret.bytecode.size() - sectionStart);
1739+
1740+
ret.codeSectionLocations.push_back(LinkerObject::CodeSectionLocation{
1741+
.start = sectionStart,
1742+
.end = ret.bytecode.size(),
1743+
.instructionLocations = std::move(instructionLocations)
1744+
});
17271745
}
17281746

17291747
for (auto const& [refPos, tagId]: tagRef)

0 commit comments

Comments
 (0)